diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index f081fe94..3527656a 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2633,6 +2633,123 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc return false; } +/* This pass flattens array loads that include the indexing of a non-constant index into multiple + * constant loads, where the value of only one of them ends up in the resulting node. + * This is achieved through a synthetic variable. The non-constant index is compared for equality + * with every possible value it can have within the array bounds, and the ternary operator is used + * to update the value of the synthetic var when the equality check passes. */ +static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_block *block) +{ + struct hlsl_constant_value zero_value = {0}; + struct hlsl_ir_node *cut_index, *zero, *store; + const struct hlsl_deref *deref; + struct hlsl_type *cut_type; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + unsigned int i, i_cut; + + if (instr->type != HLSL_IR_LOAD) + return false; + load = hlsl_ir_load(instr); + deref = &load->src; + + if (deref->path_len == 0) + return false; + + for (i = deref->path_len - 1; ; --i) + { + if (deref->path[i].node->type != HLSL_IR_CONSTANT) + { + i_cut = i; + break; + } + + if (i == 0) + return false; + } + + cut_index = deref->path[i_cut].node; + cut_type = deref->var->data_type; + for (i = 0; i < i_cut; ++i) + cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node); + + if (cut_type->class != HLSL_CLASS_ARRAY) + { + VKD3D_ASSERT(hlsl_type_is_row_major(cut_type)); + return false; + } + + if (!(var = hlsl_new_synthetic_var(ctx, "array_load", instr->data_type, &instr->loc))) + return false; + + if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + if (!(store = hlsl_new_simple_store(ctx, var, zero))) + return false; + hlsl_block_add_instr(block, store); + + TRACE("Lowering non-constant array load on variable '%s'.\n", deref->var->name); + for (i = 0; i < cut_type->e.array.elements_count; ++i) + { + struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *const_i, *equals, *ternary, *var_store; + struct hlsl_ir_load *var_load, *specific_load; + struct hlsl_deref deref_copy = {0}; + + if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, const_i); + + operands[0] = cut_index; + operands[1] = const_i; + if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, equals); + + if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, equals); + + if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, &var_load->node); + + if (!hlsl_copy_deref(ctx, &deref_copy, deref)) + return false; + hlsl_src_remove(&deref_copy.path[i_cut]); + hlsl_src_from_node(&deref_copy.path[i_cut], const_i); + + if (!(specific_load = hlsl_new_load_index(ctx, &deref_copy, NULL, &cut_index->loc))) + { + hlsl_cleanup_deref(&deref_copy); + return false; + } + hlsl_block_add_instr(block, &specific_load->node); + + hlsl_cleanup_deref(&deref_copy); + + operands[0] = equals; + operands[1] = &specific_load->node; + operands[2] = &var_load->node; + if (!(ternary = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, ternary); + + if (!(var_store = hlsl_new_simple_store(ctx, var, ternary))) + return false; + hlsl_block_add_instr(block, var_store); + } + + if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) + return false; + hlsl_block_add_instr(block, &load->node); + + return true; +} /* Lower combined samples and sampler variables to synthesized separated textures and samplers. * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -6241,6 +6358,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (profile->major_version < 4) { + while (lower_ir(ctx, lower_nonconstant_array_loads, body)); + lower_ir(ctx, lower_ternary, body); lower_ir(ctx, lower_nonfloat_exprs, body); diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test index f8901ffe..55ca15b6 100644 --- a/tests/hlsl/non-const-indexing.shader_test +++ b/tests/hlsl/non-const-indexing.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f[3]; uniform float2 i; @@ -12,16 +12,16 @@ uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float4 9.0 10.0 11.0 12.0 uniform 12 float4 0 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0) uniform 12 float4 1 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 0 1 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0) uniform 12 float4 1 1 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (9.0, 10.0, 11.0, 12.0) @@ -49,7 +49,7 @@ todo(glsl) draw quad probe (0, 0) rgba (14.0, 14.0, 14.0, 14.0) -[pixel shader todo(sm<4)] +[pixel shader] float i; float4 main() : sv_target @@ -61,7 +61,7 @@ float4 main() : sv_target [test] uniform 0 float 2.3 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (3, 3, 3, 3) @@ -90,7 +90,7 @@ todo(sm<4 | glsl) draw quad probe (0, 0) rgba (24.0, 0.0, 21.0, 1.0) -[pixel shader todo(sm<4)] +[pixel shader] uniform float2 i; float4 main() : sv_target @@ -102,20 +102,20 @@ float4 main() : sv_target [test] uniform 0 float4 0 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0) uniform 0 float4 1 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 0 1 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0) uniform 0 float4 1 1 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (9.0, 10.0, 11.0, 12.0) -[pixel shader todo(sm<4)] +[pixel shader] float4 a; float4 main() : sv_target @@ -130,11 +130,11 @@ float4 main() : sv_target [test] uniform 0 float4 0 0 2.4 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (1.0, 120.0, 90.0, 4.0) -[pixel shader todo(sm<4)] +[pixel shader] float i, j; float4 main() : sv_target @@ -148,16 +148,16 @@ float4 main() : sv_target if(sm<4) uniform 0 float 3 if(sm<4) uniform 4 float 1 if(sm>=4) uniform 0 float4 3 1 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (7, 7, 7, 7) if(sm<4) uniform 0 float 5 if(sm<4) uniform 4 float 0 if(sm>=4) uniform 0 float4 5 0 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (10, 10, 10, 10) -[pixel shader todo(sm<4)] +[pixel shader] float i, j; float k; @@ -186,17 +186,17 @@ if(sm<4) uniform 0 float 2 if(sm<4) uniform 4 float 1 if(sm<4) uniform 8 float -1 if(sm>=4) uniform 0 float4 2 1 -1 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (30, 31, 32, 33) if(sm<4) uniform 0 float 1 if(sm<4) uniform 4 float 0 if(sm<4) uniform 8 float 1 if(sm>=4) uniform 0 float4 1 0 1 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (70, 71, 72, 73) -[pixel shader todo(sm<4)] +[pixel shader] float i, j; float4 main() : sv_target @@ -214,12 +214,12 @@ float4 main() : sv_target if(sm<4) uniform 0 float 11 if(sm<4) uniform 4 float 12 if(sm>=4) uniform 0 float4 11 12 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (5, 5, 5, 5) if(sm<4) uniform 0 float 13 if(sm<4) uniform 4 float 10 if(sm>=4) uniform 0 float4 13 10 0 0 -todo(sm<4 | glsl) draw quad +todo(glsl) draw quad probe (0, 0) rgba (9, 9, 9, 9) @@ -309,7 +309,7 @@ probe (0, 0) rgba (1, 5, 3, 4) % reset requirements -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f[4]; uniform uint4 u; uniform uint4 v; @@ -332,7 +332,7 @@ uniform 8 float 3.0 uniform 12 float 4.0 uniform 16 uint4 3 1 0 2 uniform 20 uint4 0 3 1 2 -todo(sm<4 | glsl) draw quad -if(sm<4) todo probe (0,0) rgba (1.0, 1.0, 1.0, 1.0) +todo(glsl) draw quad +if(sm<4) probe (0,0) rgba (1.0, 1.0, 1.0, 1.0) if(sm>=4 & sm<6) todo probe (0,0) rgba (4.0, 4.0, 4.0, 4.0) if(sm>=6) probe (0,0) rgba (4.0, 3.0, 2.0, 1.0) diff --git a/tests/hlsl/sm1-const-allocation.shader_test b/tests/hlsl/sm1-const-allocation.shader_test index 86f9e5f9..4cc3eae8 100644 --- a/tests/hlsl/sm1-const-allocation.shader_test +++ b/tests/hlsl/sm1-const-allocation.shader_test @@ -375,7 +375,7 @@ draw quad probe (0, 0) rgba (6, 1, 0, 0) -[pixel shader todo] +[pixel shader] // Relative addressing extends the allocation size only up to the array's size. float idx; @@ -405,8 +405,8 @@ uniform 8 float 2 uniform 12 float 3 uniform 16 float 4 uniform 20 float 3 -todo draw quad +draw quad probe (0, 0) rgba (3, 3, 3, 3) uniform 20 float 1 -todo draw quad +draw quad probe (0, 0) rgba (1, 1, 1, 1)