From 69294c290bad83470e55046f02e2d1c6167150cb Mon Sep 17 00:00:00 2001 From: Petrichor Park Date: Sun, 11 Feb 2024 11:06:01 -0600 Subject: [PATCH] vkd3d-shader/hlsl: Implement acos and asin trig intrinsics. Tests have already been implemented in 92044d5e; this commit also reduces the scope of some of the todos (because now they're implemented!). Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=55154 --- libs/vkd3d-shader/hlsl.y | 57 +++++++++++++++++++++++++++++ tests/hlsl/inverse-trig.shader_test | 24 ++++++------ 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index dc7b4c1b..111fec17 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -2666,6 +2666,55 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, params->args[0], loc); } +static bool write_acos_or_asin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s %s(%s x)\n" + "{\n" + " %s abs_arg = abs(x);\n" + " %s poly_approx = (((-0.018729\n" + " * abs_arg + 0.074261)\n" + " * abs_arg - 0.212114)\n" + " * abs_arg + 1.570729);\n" + " %s correction = sqrt(1.0 - abs_arg);\n" + " %s zero_flip = (x < 0.0) * (-2.0 * correction * poly_approx + 3.141593);\n" + " %s result = poly_approx * correction + zero_flip;\n" + " return %s;\n" + "}"; + static const char fn_name_acos[] = "acos"; + static const char fn_name_asin[] = "asin"; + static const char return_stmt_acos[] = "result"; + static const char return_stmt_asin[] = "-result + 1.570796"; + + const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; + + type = params->args[0]->data_type; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, fn_name, type->name, + type->name, type->name, type->name, type->name, type->name, + (asin_mode ? return_stmt_asin : return_stmt_acos)))) + return false; + func = hlsl_compile_internal_function(ctx, fn_name, body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + +static bool intrinsic_acos(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_acos_or_asin(ctx, params, loc, false); +} + static bool intrinsic_all(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2743,6 +2792,12 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, return false; } +static bool intrinsic_asin(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return write_acos_or_asin(ctx, params, loc, true); +} + /* Find the type corresponding to the given source type, with the same * dimensions but a different base type. */ static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx, @@ -3970,9 +4025,11 @@ intrinsic_functions[] = /* Note: these entries should be kept in alphabetical order. */ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, {"any", 1, true, intrinsic_any}, {"asfloat", 1, true, intrinsic_asfloat}, + {"asin", 1, true, intrinsic_asin}, {"asuint", -1, true, intrinsic_asuint}, {"ceil", 1, true, intrinsic_ceil}, {"clamp", 3, true, intrinsic_clamp}, diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index 0c1fdc01..01344204 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -3,7 +3,7 @@ % implementations. DXIL defines intrinsics for inverse trig, to be implemented % by the backend. -[pixel shader todo] +[pixel shader todo(sm<4)] uniform float4 a; float4 main() : sv_target @@ -13,26 +13,26 @@ float4 main() : sv_target [test] uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (3.14159274, 0.0, 0.0, 0.0) 128 uniform 0 float4 -0.5 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (2.094441441, 0.0, 0.0, 0.0) 256 uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.57072878, 0.0, 0.0, 0.0) 1024 uniform 0 float4 0.5 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (1.04715133, 0.0, 0.0, 0.0) 512 uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 128 -[pixel shader todo] +[pixel shader todo(sm<4)] uniform float4 a; float4 main() : sv_target @@ -44,7 +44,7 @@ float4 main() : sv_target [test] uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (-31416.0, 0.0, 0.0, 0.0) [require] @@ -52,15 +52,15 @@ shader model < 6.0 [test] uniform 0 float4 -0.5 0.0 0.0 0.0 -todo draw quad +todo(sm<4) draw quad probe all rgba (-10473.0, 0.0, 0.0, 0.0) uniform 0 float4 0.0 0.0 0.0 0.0 -todo draw quad +todo(sm<4) draw quad probe all rgba (1.0, 0.0, 0.0, 0.0) uniform 0 float4 0.5 0.0 0.0 0.0 -todo draw quad +todo(sm<4) draw quad probe all rgba (10473.0, 0.0, 0.0, 0.0) [require] @@ -88,7 +88,7 @@ probe all rgba (10472.0, 0.0, 0.0, 0.0) 4096 [test] uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<6) draw quad +todo(sm<4) draw quad probe all rgba (31416.0, 0.0, 0.0, 0.0)