From 69294c290bad83470e55046f02e2d1c6167150cb Mon Sep 17 00:00:00 2001
From: Petrichor Park <ppark@codeweavers.com>
Date: Sun, 11 Feb 2024 11:06:01 -0600
Subject: [PATCH] vkd3d-shader/hlsl: Implement acos and asin trig intrinsics.

Tests have already been implemented in 92044d5e; this commit also reduces
the scope of some of the todos (because now they're implemented!).

Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=55154
---
 libs/vkd3d-shader/hlsl.y            | 57 +++++++++++++++++++++++++++++
 tests/hlsl/inverse-trig.shader_test | 24 ++++++------
 2 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y
index dc7b4c1b..111fec17 100644
--- a/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d-shader/hlsl.y
@@ -2666,6 +2666,55 @@ static bool intrinsic_abs(struct hlsl_ctx *ctx,
     return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, params->args[0], loc);
 }
 
+static bool write_acos_or_asin(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode)
+{
+    struct hlsl_ir_function_decl *func;
+    struct hlsl_type *type;
+    char *body;
+
+    static const char template[] =
+            "%s %s(%s x)\n"
+            "{\n"
+            "    %s abs_arg = abs(x);\n"
+            "    %s poly_approx = (((-0.018729\n"
+            "        * abs_arg + 0.074261)\n"
+            "        * abs_arg - 0.212114)\n"
+            "        * abs_arg + 1.570729);\n"
+            "    %s correction = sqrt(1.0 - abs_arg);\n"
+            "    %s zero_flip = (x < 0.0) * (-2.0 * correction * poly_approx + 3.141593);\n"
+            "    %s result = poly_approx * correction + zero_flip;\n"
+            "    return %s;\n"
+            "}";
+    static const char fn_name_acos[] = "acos";
+    static const char fn_name_asin[] = "asin";
+    static const char return_stmt_acos[] = "result";
+    static const char return_stmt_asin[] = "-result + 1.570796";
+
+    const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos;
+
+    type = params->args[0]->data_type;
+    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
+
+    if (!(body = hlsl_sprintf_alloc(ctx, template,
+            type->name, fn_name, type->name,
+            type->name, type->name, type->name, type->name, type->name,
+            (asin_mode ? return_stmt_asin : return_stmt_acos))))
+        return false;
+    func = hlsl_compile_internal_function(ctx, fn_name, body);
+    vkd3d_free(body);
+    if (!func)
+        return false;
+
+    return add_user_call(ctx, func, params, loc);
+}
+
+static bool intrinsic_acos(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+    return write_acos_or_asin(ctx, params, loc, false);
+}
+
 static bool intrinsic_all(struct hlsl_ctx *ctx,
         const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
 {
@@ -2743,6 +2792,12 @@ static bool intrinsic_any(struct hlsl_ctx *ctx,
     return false;
 }
 
+static bool intrinsic_asin(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+    return write_acos_or_asin(ctx, params, loc, true);
+}
+
 /* Find the type corresponding to the given source type, with the same
  * dimensions but a different base type. */
 static struct hlsl_type *convert_numeric_type(const struct hlsl_ctx *ctx,
@@ -3970,9 +4025,11 @@ intrinsic_functions[] =
     /* Note: these entries should be kept in alphabetical order. */
     {"D3DCOLORtoUBYTE4",                    1, true,  intrinsic_d3dcolor_to_ubyte4},
     {"abs",                                 1, true,  intrinsic_abs},
+    {"acos",                                1, true,  intrinsic_acos},
     {"all",                                 1, true,  intrinsic_all},
     {"any",                                 1, true,  intrinsic_any},
     {"asfloat",                             1, true,  intrinsic_asfloat},
+    {"asin",                                1, true,  intrinsic_asin},
     {"asuint",                             -1, true,  intrinsic_asuint},
     {"ceil",                                1, true,  intrinsic_ceil},
     {"clamp",                               3, true,  intrinsic_clamp},
diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test
index 0c1fdc01..01344204 100644
--- a/tests/hlsl/inverse-trig.shader_test
+++ b/tests/hlsl/inverse-trig.shader_test
@@ -3,7 +3,7 @@
 % implementations. DXIL defines intrinsics for inverse trig, to be implemented
 % by the backend.
 
-[pixel shader todo]
+[pixel shader todo(sm<4)]
 uniform float4 a;
 
 float4 main() : sv_target
@@ -13,26 +13,26 @@ float4 main() : sv_target
 
 [test]
 uniform 0 float4 -1.0 0.0 0.0 0.0
-todo(sm<6) draw quad
+todo(sm<4) draw quad
 probe all rgba (3.14159274, 0.0, 0.0, 0.0) 128
 
 uniform 0 float4 -0.5 0.0 0.0 0.0
-todo(sm<6) draw quad
+todo(sm<4) draw quad
 probe all rgba (2.094441441, 0.0, 0.0, 0.0) 256
 
 uniform 0 float4 0.0 0.0 0.0 0.0
-todo(sm<6) draw quad
+todo(sm<4) draw quad
 probe all rgba (1.57072878, 0.0, 0.0, 0.0) 1024
 
 uniform 0 float4 0.5 0.0 0.0 0.0
-todo(sm<6) draw quad
+todo(sm<4) draw quad
 probe all rgba (1.04715133, 0.0, 0.0, 0.0) 512
 
 uniform 0 float4 1.0 0.0 0.0 0.0
-todo(sm<6) draw quad
+todo(sm<4) draw quad
 probe all rgba (0.0, 0.0, 0.0, 0.0) 128
 
-[pixel shader todo]
+[pixel shader todo(sm<4)]
 uniform float4 a;
 
 float4 main() : sv_target
@@ -44,7 +44,7 @@ float4 main() : sv_target
 
 [test]
 uniform 0 float4 -1.0 0.0 0.0 0.0
-todo(sm<6) draw quad
+todo(sm<4) draw quad
 probe all rgba (-31416.0, 0.0, 0.0, 0.0)
 
 [require]
@@ -52,15 +52,15 @@ shader model < 6.0
 
 [test]
 uniform 0 float4 -0.5 0.0 0.0 0.0
-todo draw quad
+todo(sm<4) draw quad
 probe all rgba (-10473.0, 0.0, 0.0, 0.0)
 
 uniform 0 float4 0.0 0.0 0.0 0.0
-todo draw quad
+todo(sm<4) draw quad
 probe all rgba (1.0, 0.0, 0.0, 0.0)
 
 uniform 0 float4 0.5 0.0 0.0 0.0
-todo draw quad
+todo(sm<4) draw quad
 probe all rgba (10473.0, 0.0, 0.0, 0.0)
 
 [require]
@@ -88,7 +88,7 @@ probe all rgba (10472.0, 0.0, 0.0, 0.0) 4096
 
 [test]
 uniform 0 float4 1.0 0.0 0.0 0.0
-todo(sm<6) draw quad
+todo(sm<4) draw quad
 probe all rgba (31416.0, 0.0, 0.0, 0.0)