vkd3d-shader/hlsl: Lower SLT instructions for pixel shaders.

Properly passing the inverse-trig.shader_test tests whose qualifiers have been removed requires making spirv.c capable of handling ABS. The same happens for the ps_3_0 equality test in float-comparison.shader_test.
Approved-by: Zebediah Figura (@zfigura) Approved-by: Giovanni Mascellani (@giomasce) Approved-by: Henri Verbeet (@hverbeet) Approved-by: Alexandre Julliard (@julliard) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/706
2025-04-13 05:43:18 -07:00 · 2023-11-01 17:07:46 -03:00 · 2024-03-14 23:24:02 +01:00
parent 4133c4d801
commit 5c986b9cde
11 changed files with 113 additions and 38 deletions
--- a/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d-shader/hlsl.c
@ -1348,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp
    return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
 }

+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
+        struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3)
+{
+    struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3};
+
+    assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type));
+    assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type));
+    return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
+}
+
 struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition,
        struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc)
 {
--- a/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d-shader/hlsl.h
@ -1248,6 +1248,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond
 struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc);
 struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx,
        enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
+        struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);

 void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);

--- a/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d-shader/hlsl_codegen.c
@ -3141,6 +3141,67 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node
    return true;
 }

+/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to
+ * CMP instructions (only available in pixel shaders).
+ * Based on the following equivalence:
+ *     SLT(x, y)
+ *     = (x < y) ? 1.0 : 0.0
+ *     = ((x - y) >= 0) ? 0.0 : 1.0
+ *     = CMP(x - y, 0.0, 1.0)
+ */
+static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
+{
+    struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp;
+    struct hlsl_constant_value zero_value, one_value;
+    struct hlsl_type *float_type;
+    struct hlsl_ir_expr *expr;
+
+    if (instr->type != HLSL_IR_EXPR)
+        return false;
+    expr = hlsl_ir_expr(instr);
+    if (expr->op != HLSL_OP2_SLT)
+        return false;
+
+    arg1 = expr->operands[0].node;
+    arg2 = expr->operands[1].node;
+    float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+
+    if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc)))
+        return false;
+    hlsl_block_add_instr(block, arg1_cast);
+
+    if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc)))
+        return false;
+    hlsl_block_add_instr(block, arg2_cast);
+
+    if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc)))
+        return false;
+    hlsl_block_add_instr(block, neg);
+
+    if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg)))
+        return false;
+    hlsl_block_add_instr(block, sub);
+
+    memset(&zero_value, 0, sizeof(zero_value));
+    if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc)))
+        return false;
+    hlsl_block_add_instr(block, zero);
+
+    one_value.u[0].f = 1.0;
+    one_value.u[1].f = 1.0;
+    one_value.u[2].f = 1.0;
+    one_value.u[3].f = 1.0;
+    if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
+        return false;
+    hlsl_block_add_instr(block, one);
+
+    if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one)))
+        return false;
+    hlsl_block_add_instr(block, cmp);
+
+    return true;
+}
+
 static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 {
    struct hlsl_type *type = instr->data_type, *arg_type;
@ -5331,6 +5392,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
        lower_ir(ctx, lower_ceil, body);
        lower_ir(ctx, lower_floor, body);
        lower_ir(ctx, lower_comparison_operators, body);
+        if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
+            lower_ir(ctx, lower_slt, body);
    }

    if (profile->major_version < 2)
--- a/tests/hlsl/any.shader_test
+++ b/tests/hlsl/any.shader_test
@ -1,4 +1,4 @@
-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float4 f;

 float4 main() : sv_target
@ -8,28 +8,28 @@ float4 main() : sv_target

 [test]
 uniform 0 float4 1.0 1.0 1.0 1.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)
 uniform 0 float4 1.0 0.0 0.0 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)
 uniform 0 float4 0.0 1.0 0.0 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)
 uniform 0 float4 0.0 0.0 1.0 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)
 uniform 0 float4 0.0 0.0 0.0 1.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)
 uniform 0 float4 0.0 0.0 0.0 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (0.0, 0.0, 0.0, 0.0)
 uniform 0 float4 -1.0 -1.0 -1.0 -1.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)

-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float f;

 float4 main() : sv_target
@ -39,13 +39,13 @@ float4 main() : sv_target

 [test]
 uniform 0 float4 1.0 0.0 0.0 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)
 uniform 0 float4 0.0 0.0 0.0 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (0.0, 0.0, 0.0, 0.0)
 uniform 0 float4 -1.0 0.0 0.0 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)


--- a/tests/hlsl/expr-indexing.shader_test
+++ b/tests/hlsl/expr-indexing.shader_test
@ -13,7 +13,7 @@ draw quad
 probe all rgba (8.0, 8.0, 8.0, 8.0)


-[pixel shader todo(sm<4)]
+[pixel shader]
 float4 a, b;
 float i;

@ -26,7 +26,7 @@ float4 main() : sv_target
 uniform 0 float4 1.0 2.0 3.0 4.0
 uniform 4 float4 5.0 6.0 7.0 8.0
 uniform 8 float 2
-todo(sm<4) draw quad
+draw quad
 probe all rgba (10.0, 10.0, 10.0, 10.0)


@ -44,7 +44,7 @@ draw quad
 probe all rgba (3.0, 3.0, 3.0, 3.0)


-[pixel shader todo(sm<4)]
+[pixel shader]
 float4 a;
 float i;

@ -56,10 +56,10 @@ float4 main() : sv_target
 [test]
 uniform 0 float4 1.0 2.0 3.0 4.0
 uniform 4 float 0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (4.0, 4.0, 4.0, 4.0)
 uniform 4 float 2
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 1.0)


@ -82,7 +82,7 @@ draw quad
 probe all rgba (4.0, 4.0, 4.0, 4.0)


-[pixel shader todo(sm<4)]
+[pixel shader]
 float4 a;
 float i;

@ -99,5 +99,5 @@ float4 main() : sv_target
 [test]
 uniform 0 float4 1.0 2.0 3.0 4.0
 uniform 4 float 1
-todo(sm<4) draw quad
+draw quad
 probe all rgba (2.0, 2.0, 2.0, 2.0)
--- a/tests/hlsl/float-comparison.shader_test
+++ b/tests/hlsl/float-comparison.shader_test
@ -1,5 +1,5 @@
 % Check that -0.0f is not less than 0.0f
-[pixel shader todo(sm<4)]
+[pixel shader]
 float a;

 float4 main() : sv_target
@ -9,7 +9,7 @@ float4 main() : sv_target

 [test]
 uniform 0 float 0.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (0.0, 0.0, 0.0, 0.0)


@ -77,7 +77,7 @@ if(sm>=6) probe all rgba (1010101.0, 11110000.0, 1101001.0, 1.0)
 shader model >= 2.0
 shader model < 3.0

-[pixel shader todo(sm<4)]
+[pixel shader]
 float4 a, b;

 float4 main() : sv_target
@ -88,7 +88,7 @@ float4 main() : sv_target
 [test]
 uniform 0 float4 1e-37  1e-37 1e+38  1e+38
 uniform 4 float4     0 -1e-37 1e+38 -1e+38
-todo(sm<4) draw quad
+draw quad
 probe all rgba (1.0, 1.0, 1.0, 0.0)


@ -96,7 +96,7 @@ probe all rgba (1.0, 1.0, 1.0, 0.0)
 shader model >= 3.0
 shader model < 4.0

-[pixel shader todo(sm<4)]
+[pixel shader]
 float4 a, b;

 float4 main() : sv_target
--- a/tests/hlsl/inverse-trig.shader_test
+++ b/tests/hlsl/inverse-trig.shader_test
@ -3,7 +3,7 @@
 % implementations. DXIL defines intrinsics for inverse trig, to be implemented
 % by the backend.

-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float4 a;

 float4 main() : sv_target
@ -32,7 +32,7 @@ uniform 0 float4 1.0 0.0 0.0 0.0
 todo(sm<4) draw quad
 probe all rgba (0.0, 0.0, 0.0, 0.0) 128

-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float4 a;

 float4 main() : sv_target
--- a/tests/hlsl/matrix-indexing.shader_test
+++ b/tests/hlsl/matrix-indexing.shader_test
@ -108,7 +108,7 @@ draw quad
 probe all rgba (3.0, 4.0, 50.0, 60.0)


-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float i;

 float4 main() : sv_target
@ -120,8 +120,8 @@ float4 main() : sv_target

 [test]
 uniform 0 float 2
-todo(sm<4) draw quad
-probe all rgba (8, 9, 10, 11)
+draw quad
+todo(sm<4) probe all rgba (8, 9, 10, 11)


 [pixel shader todo(sm<4)]
--- a/tests/hlsl/non-const-indexing.shader_test
+++ b/tests/hlsl/non-const-indexing.shader_test
@ -25,7 +25,7 @@ todo(sm<4) draw quad
 probe all rgba (9.0, 10.0, 11.0, 12.0)


-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float i;

 float4 main() : SV_TARGET
@ -36,16 +36,16 @@ float4 main() : SV_TARGET

 [test]
 uniform 0 float 0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (11.0, 11.0, 11.0, 11.0)
 uniform 0 float 1
-todo(sm<4) draw quad
+draw quad
 probe all rgba (12.0, 12.0, 12.0, 12.0)
 uniform 0 float 2
-todo(sm<4) draw quad
+draw quad
 probe all rgba (13.0, 13.0, 13.0, 13.0)
 uniform 0 float 3
-todo(sm<4) draw quad
+draw quad
 probe all rgba (14.0, 14.0, 14.0, 14.0)


--- a/tests/hlsl/step.shader_test
+++ b/tests/hlsl/step.shader_test
@ -1,4 +1,4 @@
-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float4 f, p;

 float4 main() : sv_target
@ -9,7 +9,7 @@ float4 main() : sv_target
 [test]
 uniform 0 float4 5.0 -2.6 3.0 2.0
 uniform 4 float4 1.0 -4.3 3.0 4.0
-todo(sm<4) draw quad
+draw quad
 probe all rgba (0.0, 0.0, 1.0, 1.0)


--- a/tests/hlsl/vector-indexing-uniform.shader_test
+++ b/tests/hlsl/vector-indexing-uniform.shader_test
@ -1,6 +1,6 @@
 % Use a uniform to prevent the compiler from optimizing.

-[pixel shader todo(sm<4)]
+[pixel shader]
 uniform float i;
 float4 main() : SV_TARGET
 {
@ -12,5 +12,5 @@ float4 main() : SV_TARGET

 [test]
 uniform 0 float 2
-todo(sm<4) draw quad
+draw quad
 probe all rgba (0.5, 0.3, 0.8, 0.2)