vkd3d-shader/hlsl: Support smoothstep() intrinsic.

Approved-by: Giovanni Mascellani (@giomasce) Approved-by: Zebediah Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Approved-by: Alexandre Julliard (@julliard) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/53
2025-04-13 05:43:18 -07:00 · 2022-11-03 20:54:48 -03:00 · 2023-01-11 22:39:00 +01:00
parent 09e7218539
commit a7bb5a0835
3 changed files with 244 additions and 0 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -102,6 +102,7 @@ vkd3d_shader_tests = \
 	tests/hlsl-return-void.shader_test \
 	tests/hlsl-shape.shader_test \
 	tests/hlsl-single-numeric-initializer.shader_test \
+	tests/hlsl-smoothstep.shader_test \
 	tests/hlsl-state-block-syntax.shader_test \
 	tests/hlsl-static-initializer.shader_test \
 	tests/hlsl-storage-qualifiers.shader_test \
--- a/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d-shader/hlsl.y
@@ -2596,6 +2596,82 @@ static bool intrinsic_saturate(struct hlsl_ctx *ctx,
    return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, arg, loc);
 }

+/* smoothstep(a, b, x) = p^2 (3 - 2p), where p = saturate((x - a)/(b - a)) */
+static bool intrinsic_smoothstep(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+    struct hlsl_ir_node *min_arg, *max_arg, *x_arg, *p, *p_num, *p_denom, *res;
+    struct hlsl_ir_constant *one, *minus_two, *three;
+    enum hlsl_type_class common_class;
+    struct hlsl_type *common_type;
+    unsigned int dimx, dimy;
+
+    min_arg = params->args[0];
+    max_arg = params->args[1];
+    x_arg = params->args[2];
+
+    if (!expr_common_shape(ctx, min_arg->data_type, max_arg->data_type, loc, &common_class, &dimx, &dimy))
+        return false;
+    common_type = hlsl_get_numeric_type(ctx, common_class, HLSL_TYPE_FLOAT, dimx, dimy);
+
+    if (!expr_common_shape(ctx, common_type, x_arg->data_type, loc, &common_class, &dimx, &dimy))
+        return false;
+    common_type = hlsl_get_numeric_type(ctx, common_class, HLSL_TYPE_FLOAT, dimx, dimy);
+
+    if (!(min_arg = add_implicit_conversion(ctx, params->instrs, min_arg, common_type, loc)))
+        return false;
+
+    if (!(max_arg = add_implicit_conversion(ctx, params->instrs, max_arg, common_type, loc)))
+        return false;
+
+    if (!(x_arg = add_implicit_conversion(ctx, params->instrs, x_arg, common_type, loc)))
+        return false;
+
+    if (!(min_arg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, min_arg, loc)))
+        return false;
+
+    if (!(p_num = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, x_arg, min_arg, loc)))
+        return false;
+
+    if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, max_arg, min_arg, loc)))
+        return false;
+
+    if (!(one = hlsl_new_float_constant(ctx, 1.0, loc)))
+        return false;
+    list_add_tail(params->instrs, &one->node.entry);
+
+    if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, &one->node, p_denom, loc)))
+        return false;
+
+    if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p_num, p_denom, loc)))
+        return false;
+
+    if (!(p = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SAT, p, loc)))
+        return false;
+
+    if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc)))
+        return false;
+    list_add_tail(params->instrs, &minus_two->node.entry);
+
+    if (!(three = hlsl_new_float_constant(ctx, 3.0, loc)))
+        return false;
+    list_add_tail(params->instrs, &three->node.entry);
+
+    if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, &minus_two->node, p, loc)))
+        return false;
+
+    if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, &three->node, res, loc)))
+        return false;
+
+    if (!(p = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, p, loc)))
+        return false;
+
+    if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, p, res, loc)))
+        return false;
+
+    return true;
+}
+
 static bool intrinsic_transpose(struct hlsl_ctx *ctx,
        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
 {
@@ -2681,6 +2757,7 @@ intrinsic_functions[] =
    {"pow",                                 2, true,  intrinsic_pow},
    {"round",                               1, true,  intrinsic_round},
    {"saturate",                            1, true,  intrinsic_saturate},
+    {"smoothstep",                          3, true,  intrinsic_smoothstep},
    {"transpose",                           1, true,  intrinsic_transpose},
 };

--- a/tests/hlsl-smoothstep.shader_test
+++ b/tests/hlsl-smoothstep.shader_test
@@ -0,0 +1,166 @@
+
+
+[pixel shader]
+float4 main() : sv_target
+{
+    float4 a = {1, -1, -1, 10};
+    float4 b = {2, 1, 1, 20};
+    float4 x = {0.3, 0.4, 2, 15.4};
+
+    return smoothstep(a, b, x);
+}
+
+[test]
+draw quad
+probe all rgba (0, 0.784, 1.0, 0.559872) 1
+
+
+[pixel shader]
+float4 main() : sv_target
+{
+    float a = 1;
+    float b = 2;
+    float4 x = {0.9, 1.2, 1.8, 2.1};
+
+    return smoothstep(a, b, x);
+}
+
+[test]
+draw quad
+probe all rgba (0, 0.104, 0.896, 1.000000) 5
+
+
+[pixel shader]
+float4 main() : sv_target
+{
+    float4 a = {1, 10, 100, 1000};
+    float4 b = {2, 20, 200, 2000};
+    float x = 14;
+
+    return smoothstep(a, b, x);
+}
+
+[test]
+draw quad
+probe all rgba (1.0, 0.352, 0, 0) 1
+
+
+[pixel shader]
+float4 main() : sv_target
+{
+    float2 a = {1, 10};
+    float3 b = {2, 20, 200};
+    float4 x = {1.4, 14, 140, 1400};
+
+    float2 res = smoothstep(a, b, x);
+    return float4(res, 0, 0);
+}
+
+[test]
+draw quad
+probe all rgba (0.352, 0.352, 0, 0) 1
+
+
+[pixel shader]
+float4 main() : sv_target
+{
+    float3 a = {1, 10, 100};
+    float2 b = {2, 20};
+    float4 x = {1.4, 14, 140, 1400};
+
+    float2 res = smoothstep(a, b, x);
+    return float4(res, 0, 0);
+}
+
+[test]
+draw quad
+probe all rgba (0.352, 0.352, 0, 0) 1
+
+
+[pixel shader]
+float4 main() : sv_target
+{
+    float4 a = {1, 10, 100, 1000};
+    float4 b = {2, 20, 200, 2000};
+    float2 x = {14, 140};
+
+    float2 res = smoothstep(a, b, x);
+    return float4(res, 0, 0);
+}
+
+[test]
+draw quad
+probe all rgba (1.0, 1.0, 0, 0) 1
+
+
+[pixel shader todo]
+float4 main() : sv_target
+{
+    float2x3 a = {1, 1, 1, 1, 1, 1};
+    float3x2 b = {2, 2, 2, 2, 2, 2};
+    float4x2 x = {1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8};
+
+    float2x2 r = smoothstep(a, b, x);
+    return r;
+}
+
+[test]
+todo draw quad
+todo probe all rgba (0.028, 0.104, 0.216, 0.352) 1
+
+
+[pixel shader]
+// 4 division by zero warnings.
+// Only test compilation because result is implementation-dependent.
+float4 main() : sv_target
+{
+    float4 a = {0, 0, 0, 0};
+    float4 b = {-1, -1, 0, 0};
+    float4 x = {0, -0.25, 0, 1};
+
+    return smoothstep(a, b, x);
+}
+
+
+[pixel shader]
+float4 main() : sv_target
+{
+    float4x1 a = {0.0, 0.0, 0.0, 0.0};
+    float b = 1.0;
+    float3x1 x = {0.5, 0.5, 0.5};
+
+    float3x1 r = smoothstep(a, b, x);
+    return float4(r, 0);
+}
+
+[test]
+draw quad
+probe all rgba (0.5, 0.5, 0.5, 0.0)
+
+
+[pixel shader todo]
+float4 main() : sv_target
+{
+    float4x1 a = {0.0, 0.0, 0.0, 0.0};
+    float2x2 b = {1.0, 1.0, 1.0, 1.0};
+    float3x1 x = {0.5, 0.5, 0.5};
+
+    float2x1 r = smoothstep(a, b, x);
+    return float4(r, r);
+}
+
+[test]
+todo draw quad
+todo probe all rgba (0.5, 0.5, 0.5, 0.5)
+
+
+[pixel shader fail todo]
+float4 main() : sv_target
+{
+    float2x2 a = {0.0, 0.0, 0.0, 0.0};
+    float4 b = 1.0;
+    float2x2 x = {0.5, 0.5, 0.5, 0.5};
+
+    smoothstep(a, b, x);
+    return 0;
+}