vkd3d-shader/hlsl: Delay lowering complex casts until after parse time.

While so far it has been posible to do this at parse time, this must happen after knowing if the complex cast is on the lhs or not. The modified tests show that before this patch we are currently miscompiling when this happens, because a complex lhs cast is transformed into a load, and add_assigment() just stores to the generated "cast" temp.
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1350
2025-04-13 05:43:18 -07:00 · 2025-01-16 17:08:24 -03:00 · 2025-01-22 15:04:09 +01:00
parent dc37d90190
commit c2e224c5fb
5 changed files with 111 additions and 93 deletions
--- a/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d-shader/hlsl.y
@ -359,79 +359,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl
    if (src_type->class == HLSL_CLASS_NULL)
        return node;

-    if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR)
-    {
-        unsigned int src_comp_count = hlsl_type_component_count(src_type);
-        unsigned int dst_comp_count = hlsl_type_component_count(dst_type);
-        struct hlsl_deref var_deref;
-        bool broadcast, matrix_cast;
-        struct hlsl_ir_load *load;
-        struct hlsl_ir_var *var;
-        unsigned int dst_idx;
-
-        broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1;
-        matrix_cast = !broadcast && dst_comp_count != src_comp_count
-                && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX;
-        VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast);
-        if (matrix_cast)
-        {
-            VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx);
-            VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy);
-        }
-
-        if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc)))
-            return NULL;
-        hlsl_init_simple_deref_from_var(&var_deref, var);
-
-        for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx)
-        {
-            struct hlsl_ir_node *component_load;
-            struct hlsl_type *dst_comp_type;
-            struct hlsl_block store_block;
-            unsigned int src_idx;
-
-            if (broadcast)
-            {
-                src_idx = 0;
-            }
-            else if (matrix_cast)
-            {
-                unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx;
-
-                src_idx = y * src_type->e.numeric.dimx + x;
-            }
-            else
-            {
-                src_idx = dst_idx;
-            }
-
-            dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
-
-            if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc)))
-                return NULL;
-
-            if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc)))
-                return NULL;
-            hlsl_block_add_instr(block, cast);
-
-            if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast))
-                return NULL;
-            hlsl_block_add_block(block, &store_block);
-        }
-
-        if (!(load = hlsl_new_var_load(ctx, var, loc)))
-            return NULL;
-        hlsl_block_add_instr(block, &load->node);
-
-        return &load->node;
-    }
-    else
-    {
    if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc)))
        return NULL;
    hlsl_block_add_instr(block, cast);
+
    return cast;
-    }
 }

 static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block,
--- a/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d-shader/hlsl_codegen.c
@ -1075,6 +1075,90 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h
    return &coords_load->node;
 }

+static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
+{
+    unsigned int src_comp_count, dst_comp_count;
+    struct hlsl_type *src_type, *dst_type;
+    struct hlsl_deref var_deref;
+    bool broadcast, matrix_cast;
+    struct hlsl_ir_load *load;
+    struct hlsl_ir_node *arg;
+    struct hlsl_ir_var *var;
+    unsigned int dst_idx;
+
+    if (instr->type != HLSL_IR_EXPR)
+        return false;
+
+    if (hlsl_ir_expr(instr)->op != HLSL_OP1_CAST)
+        return false;
+
+    arg = hlsl_ir_expr(instr)->operands[0].node;
+    dst_type = instr->data_type;
+    src_type = arg->data_type;
+
+    if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR)
+        return false;
+
+    src_comp_count = hlsl_type_component_count(src_type);
+    dst_comp_count = hlsl_type_component_count(dst_type);
+    broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1;
+    matrix_cast = !broadcast && dst_comp_count != src_comp_count
+            && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX;
+
+    VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast);
+    if (matrix_cast)
+    {
+        VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx);
+        VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy);
+    }
+
+    if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, &instr->loc)))
+        return false;
+    hlsl_init_simple_deref_from_var(&var_deref, var);
+
+    for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx)
+    {
+        struct hlsl_ir_node *component_load, *cast;
+        struct hlsl_type *dst_comp_type;
+        struct hlsl_block store_block;
+        unsigned int src_idx;
+
+        if (broadcast)
+        {
+            src_idx = 0;
+        }
+        else if (matrix_cast)
+        {
+            unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx;
+
+            src_idx = y * src_type->e.numeric.dimx + x;
+        }
+        else
+        {
+            src_idx = dst_idx;
+        }
+
+        dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
+
+        if (!(component_load = hlsl_add_load_component(ctx, block, arg, src_idx, &arg->loc)))
+            return false;
+
+        if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, &arg->loc)))
+            return false;
+        hlsl_block_add_instr(block, cast);
+
+        if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast))
+            return false;
+        hlsl_block_add_block(block, &store_block);
+    }
+
+    if (!(load = hlsl_new_var_load(ctx, var, &instr->loc)))
+        return false;
+    hlsl_block_add_instr(block, &load->node);
+
+    return true;
+}
+
 /* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that
 * represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of
 * an assignment or as a value made from different components of the matrix. The former cases should
@ -6790,6 +6874,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
 {
    bool progress;

+    lower_ir(ctx, lower_complex_casts, body);
    lower_ir(ctx, lower_matrix_swizzles, body);

    lower_ir(ctx, lower_broadcasts, body);
@ -12084,6 +12169,7 @@ static void process_entry_function(struct hlsl_ctx *ctx,

    while (hlsl_transform_ir(ctx, lower_calls, body, NULL));

+    lower_ir(ctx, lower_complex_casts, body);
    lower_ir(ctx, lower_matrix_swizzles, body);
    lower_ir(ctx, lower_index_loads, body);

--- a/tests/hlsl/function-cast.shader_test
+++ b/tests/hlsl/function-cast.shader_test
@ -176,7 +176,7 @@ float4 main() : sv_target
 }


-[pixel shader fail(sm>=4) todo(sm>=4)]
+[pixel shader fail(sm>=4) todo]
 void fun(out float4 f)
 {
    f = float4(1.1, 2.3, 3.6, 4.3);
@ -191,5 +191,5 @@ float4 main() : sv_target
 }

 [test]
-todo(sm>=4) draw quad
-todo(sm<4) probe (0, 0) rgba(1.1, 2.3, 3.6, 4.3)
+todo draw quad
+probe (0, 0) rgba(1.1, 2.3, 3.6, 4.3)
--- a/tests/hlsl/lhs-cast.shader_test
+++ b/tests/hlsl/lhs-cast.shader_test
@ -30,7 +30,7 @@ probe (0, 0) rgba(-1, -2, 4, 0)

 % Casts don't actually perform base type changes, only the outtermost one, which
 % I suspect is because the implicit cast on the assignment and not the cast itself.
-[pixel shader fail(sm>=4) todo(sm>=4)]
+[pixel shader fail(sm>=4) todo]
 float4 main() : sv_target
 {
    float4 f = 0;
@ -40,8 +40,8 @@ float4 main() : sv_target
 }

 [test]
-todo(sm>=4) draw quad
-todo probe (0, 0) rgba(1.3, -2.4, 3.3, 4.7)
+todo draw quad
+probe (0, 0) rgba(1.3, -2.4, 3.3, 4.7)


 [pixel shader fail(sm>=4) todo]
@ -69,7 +69,7 @@ float4 main() : sv_target
 }


-[pixel shader fail(sm>=6)]
+[pixel shader fail(sm>=6) todo]
 float4 main() : sv_target
 {
    float4 f = 0;
@ -79,8 +79,8 @@ float4 main() : sv_target
 }

 [test]
-draw quad
-todo probe (0, 0) rgba(1, 2, 3, 4)
+todo draw quad
+probe (0, 0) rgba(1, 2, 3, 4)


 [pixel shader fail(sm>=4) todo]
--- a/tests/hlsl/lhs-partial-assignment.shader_test
+++ b/tests/hlsl/lhs-partial-assignment.shader_test
@ -32,7 +32,7 @@ todo(sm<6) draw quad
 probe (0, 0) rgba(1, -2, -3, -4)


-[pixel shader]
+[pixel shader todo]
 float4 main() : sv_target
 {
    float p[5] = {-1, -2, -3, -4, -5};
@ -43,8 +43,8 @@ float4 main() : sv_target
 }

 [test]
-draw quad
-todo(sm<6) probe (0, 0) rgba(1, 2, -3, -5)
+todo(sm<6) draw quad
+probe (0, 0) rgba(1, 2, -3, -5)


 [pixel shader]
@ -75,7 +75,7 @@ float4 main() : sv_target


 % Matrix partial assignment.
-[pixel shader fail(sm>=6)]
+[pixel shader fail(sm>=6) todo]
 float3x4 a;

 float4 main() : sv_target
@ -91,12 +91,12 @@ uniform 0 float4 1 5 9 0
 uniform 4 float4 2 6 10 0
 uniform 8 float4 3 7 11 0
 uniform 12 float4 4 8 12 0
-draw quad
-todo probe (0, 0) rgba(1050000, 2060000, 3070000, 0)
+todo draw quad
+probe (0, 0) rgba(1050000, 2060000, 3070000, 0)


 % Multiple cast partial assigment.
-[pixel shader fail(sm>=6)]
+[pixel shader fail(sm>=6) todo]
 float4 main() : sv_target
 {
    float4x4 g = 0;
@ -106,8 +106,8 @@ float4 main() : sv_target
 }

 [test]
-draw quad
-todo probe (0, 0) rgba(1, 2, 5, 6)
+todo draw quad
+probe (0, 0) rgba(1, 2, 5, 6)


 [pixel shader fail(sm>=4) todo]
@ -134,7 +134,7 @@ float4 main() : sv_target
 }


-[pixel shader]
+[pixel shader todo]
 float4 main() : sv_target
 {
    float p[4][3] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
@ -145,8 +145,8 @@ float4 main() : sv_target
 }

 [test]
-draw quad
-todo(sm<6) probe (0, 0) rgba(10, 5, 40, 9)
+todo(sm<6) draw quad
+probe (0, 0) rgba(10, 5, 40, 9)


 % For some reason this acts as .z swizzle on the rhs.