vkd3d-shader/hlsl: Delay lowering complex casts until after parse time.

While so far it has been posible to do this at parse time, this must
happen after knowing if the complex cast is on the lhs or not.

The modified tests show that before this patch we are currently
miscompiling when this happens, because a complex lhs cast is transformed
into a load, and add_assigment() just stores to the generated "cast"
temp.
This commit is contained in:
Francisco Casas 2025-01-16 17:08:24 -03:00 committed by Henri Verbeet
parent dc37d90190
commit c2e224c5fb
Notes: Henri Verbeet 2025-01-22 15:04:09 +01:00
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1350
5 changed files with 111 additions and 93 deletions

View File

@ -359,79 +359,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl
if (src_type->class == HLSL_CLASS_NULL)
return node;
if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR)
{
unsigned int src_comp_count = hlsl_type_component_count(src_type);
unsigned int dst_comp_count = hlsl_type_component_count(dst_type);
struct hlsl_deref var_deref;
bool broadcast, matrix_cast;
struct hlsl_ir_load *load;
struct hlsl_ir_var *var;
unsigned int dst_idx;
broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1;
matrix_cast = !broadcast && dst_comp_count != src_comp_count
&& src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX;
VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast);
if (matrix_cast)
{
VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx);
VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy);
}
if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc)))
return NULL;
hlsl_init_simple_deref_from_var(&var_deref, var);
for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx)
{
struct hlsl_ir_node *component_load;
struct hlsl_type *dst_comp_type;
struct hlsl_block store_block;
unsigned int src_idx;
if (broadcast)
{
src_idx = 0;
}
else if (matrix_cast)
{
unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx;
src_idx = y * src_type->e.numeric.dimx + x;
}
else
{
src_idx = dst_idx;
}
dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc)))
return NULL;
if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc)))
return NULL;
hlsl_block_add_instr(block, cast);
if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast))
return NULL;
hlsl_block_add_block(block, &store_block);
}
if (!(load = hlsl_new_var_load(ctx, var, loc)))
return NULL;
hlsl_block_add_instr(block, &load->node);
return &load->node;
}
else
{
if (!(cast = hlsl_new_cast(ctx, node, dst_type, loc)))
return NULL;
hlsl_block_add_instr(block, cast);
return cast;
}
}
static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *block,

View File

@ -1075,6 +1075,90 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h
return &coords_load->node;
}
static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
unsigned int src_comp_count, dst_comp_count;
struct hlsl_type *src_type, *dst_type;
struct hlsl_deref var_deref;
bool broadcast, matrix_cast;
struct hlsl_ir_load *load;
struct hlsl_ir_node *arg;
struct hlsl_ir_var *var;
unsigned int dst_idx;
if (instr->type != HLSL_IR_EXPR)
return false;
if (hlsl_ir_expr(instr)->op != HLSL_OP1_CAST)
return false;
arg = hlsl_ir_expr(instr)->operands[0].node;
dst_type = instr->data_type;
src_type = arg->data_type;
if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR)
return false;
src_comp_count = hlsl_type_component_count(src_type);
dst_comp_count = hlsl_type_component_count(dst_type);
broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1;
matrix_cast = !broadcast && dst_comp_count != src_comp_count
&& src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX;
VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast);
if (matrix_cast)
{
VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx);
VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy);
}
if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, &instr->loc)))
return false;
hlsl_init_simple_deref_from_var(&var_deref, var);
for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx)
{
struct hlsl_ir_node *component_load, *cast;
struct hlsl_type *dst_comp_type;
struct hlsl_block store_block;
unsigned int src_idx;
if (broadcast)
{
src_idx = 0;
}
else if (matrix_cast)
{
unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx;
src_idx = y * src_type->e.numeric.dimx + x;
}
else
{
src_idx = dst_idx;
}
dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
if (!(component_load = hlsl_add_load_component(ctx, block, arg, src_idx, &arg->loc)))
return false;
if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, &arg->loc)))
return false;
hlsl_block_add_instr(block, cast);
if (!hlsl_new_store_component(ctx, &store_block, &var_deref, dst_idx, cast))
return false;
hlsl_block_add_block(block, &store_block);
}
if (!(load = hlsl_new_var_load(ctx, var, &instr->loc)))
return false;
hlsl_block_add_instr(block, &load->node);
return true;
}
/* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that
* represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of
* an assignment or as a value made from different components of the matrix. The former cases should
@ -6790,6 +6874,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
{
bool progress;
lower_ir(ctx, lower_complex_casts, body);
lower_ir(ctx, lower_matrix_swizzles, body);
lower_ir(ctx, lower_broadcasts, body);
@ -12084,6 +12169,7 @@ static void process_entry_function(struct hlsl_ctx *ctx,
while (hlsl_transform_ir(ctx, lower_calls, body, NULL));
lower_ir(ctx, lower_complex_casts, body);
lower_ir(ctx, lower_matrix_swizzles, body);
lower_ir(ctx, lower_index_loads, body);

View File

@ -176,7 +176,7 @@ float4 main() : sv_target
}
[pixel shader fail(sm>=4) todo(sm>=4)]
[pixel shader fail(sm>=4) todo]
void fun(out float4 f)
{
f = float4(1.1, 2.3, 3.6, 4.3);
@ -191,5 +191,5 @@ float4 main() : sv_target
}
[test]
todo(sm>=4) draw quad
todo(sm<4) probe (0, 0) rgba(1.1, 2.3, 3.6, 4.3)
todo draw quad
probe (0, 0) rgba(1.1, 2.3, 3.6, 4.3)

View File

@ -30,7 +30,7 @@ probe (0, 0) rgba(-1, -2, 4, 0)
% Casts don't actually perform base type changes, only the outtermost one, which
% I suspect is because the implicit cast on the assignment and not the cast itself.
[pixel shader fail(sm>=4) todo(sm>=4)]
[pixel shader fail(sm>=4) todo]
float4 main() : sv_target
{
float4 f = 0;
@ -40,8 +40,8 @@ float4 main() : sv_target
}
[test]
todo(sm>=4) draw quad
todo probe (0, 0) rgba(1.3, -2.4, 3.3, 4.7)
todo draw quad
probe (0, 0) rgba(1.3, -2.4, 3.3, 4.7)
[pixel shader fail(sm>=4) todo]
@ -69,7 +69,7 @@ float4 main() : sv_target
}
[pixel shader fail(sm>=6)]
[pixel shader fail(sm>=6) todo]
float4 main() : sv_target
{
float4 f = 0;
@ -79,8 +79,8 @@ float4 main() : sv_target
}
[test]
draw quad
todo probe (0, 0) rgba(1, 2, 3, 4)
todo draw quad
probe (0, 0) rgba(1, 2, 3, 4)
[pixel shader fail(sm>=4) todo]

View File

@ -32,7 +32,7 @@ todo(sm<6) draw quad
probe (0, 0) rgba(1, -2, -3, -4)
[pixel shader]
[pixel shader todo]
float4 main() : sv_target
{
float p[5] = {-1, -2, -3, -4, -5};
@ -43,8 +43,8 @@ float4 main() : sv_target
}
[test]
draw quad
todo(sm<6) probe (0, 0) rgba(1, 2, -3, -5)
todo(sm<6) draw quad
probe (0, 0) rgba(1, 2, -3, -5)
[pixel shader]
@ -75,7 +75,7 @@ float4 main() : sv_target
% Matrix partial assignment.
[pixel shader fail(sm>=6)]
[pixel shader fail(sm>=6) todo]
float3x4 a;
float4 main() : sv_target
@ -91,12 +91,12 @@ uniform 0 float4 1 5 9 0
uniform 4 float4 2 6 10 0
uniform 8 float4 3 7 11 0
uniform 12 float4 4 8 12 0
draw quad
todo probe (0, 0) rgba(1050000, 2060000, 3070000, 0)
todo draw quad
probe (0, 0) rgba(1050000, 2060000, 3070000, 0)
% Multiple cast partial assigment.
[pixel shader fail(sm>=6)]
[pixel shader fail(sm>=6) todo]
float4 main() : sv_target
{
float4x4 g = 0;
@ -106,8 +106,8 @@ float4 main() : sv_target
}
[test]
draw quad
todo probe (0, 0) rgba(1, 2, 5, 6)
todo draw quad
probe (0, 0) rgba(1, 2, 5, 6)
[pixel shader fail(sm>=4) todo]
@ -134,7 +134,7 @@ float4 main() : sv_target
}
[pixel shader]
[pixel shader todo]
float4 main() : sv_target
{
float p[4][3] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
@ -145,8 +145,8 @@ float4 main() : sv_target
}
[test]
draw quad
todo(sm<6) probe (0, 0) rgba(10, 5, 40, 9)
todo(sm<6) draw quad
probe (0, 0) rgba(10, 5, 40, 9)
% For some reason this acts as .z swizzle on the rhs.