vkd3d-shader/hlsl: Lower non-constant array loads for SM1.

This is achieved by means of creating a variable storing zero,
loading every array element, comparing if the non-constant index
matches the index of that element at runtime, and in that case
store the corresponding element in the variable.

This seems to be the same strategy that the native compiler uses.
This commit is contained in:
Francisco Casas 2024-07-08 15:13:07 -04:00 committed by Henri Verbeet
parent e0cfd8f86a
commit 9f515a9daa
Notes: Henri Verbeet 2024-08-08 23:47:10 +02:00
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/936
3 changed files with 148 additions and 29 deletions

View File

@ -2633,6 +2633,123 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc
return false;
}
/* This pass flattens array loads that include the indexing of a non-constant index into multiple
* constant loads, where the value of only one of them ends up in the resulting node.
* This is achieved through a synthetic variable. The non-constant index is compared for equality
* with every possible value it can have within the array bounds, and the ternary operator is used
* to update the value of the synthetic var when the equality check passes. */
static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
struct hlsl_block *block)
{
struct hlsl_constant_value zero_value = {0};
struct hlsl_ir_node *cut_index, *zero, *store;
const struct hlsl_deref *deref;
struct hlsl_type *cut_type;
struct hlsl_ir_load *load;
struct hlsl_ir_var *var;
unsigned int i, i_cut;
if (instr->type != HLSL_IR_LOAD)
return false;
load = hlsl_ir_load(instr);
deref = &load->src;
if (deref->path_len == 0)
return false;
for (i = deref->path_len - 1; ; --i)
{
if (deref->path[i].node->type != HLSL_IR_CONSTANT)
{
i_cut = i;
break;
}
if (i == 0)
return false;
}
cut_index = deref->path[i_cut].node;
cut_type = deref->var->data_type;
for (i = 0; i < i_cut; ++i)
cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node);
if (cut_type->class != HLSL_CLASS_ARRAY)
{
VKD3D_ASSERT(hlsl_type_is_row_major(cut_type));
return false;
}
if (!(var = hlsl_new_synthetic_var(ctx, "array_load", instr->data_type, &instr->loc)))
return false;
if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc)))
return false;
hlsl_block_add_instr(block, zero);
if (!(store = hlsl_new_simple_store(ctx, var, zero)))
return false;
hlsl_block_add_instr(block, store);
TRACE("Lowering non-constant array load on variable '%s'.\n", deref->var->name);
for (i = 0; i < cut_type->e.array.elements_count; ++i)
{
struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL);
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
struct hlsl_ir_node *const_i, *equals, *ternary, *var_store;
struct hlsl_ir_load *var_load, *specific_load;
struct hlsl_deref deref_copy = {0};
if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc)))
return false;
hlsl_block_add_instr(block, const_i);
operands[0] = cut_index;
operands[1] = const_i;
if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc)))
return false;
hlsl_block_add_instr(block, equals);
if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc)))
return false;
hlsl_block_add_instr(block, equals);
if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc)))
return false;
hlsl_block_add_instr(block, &var_load->node);
if (!hlsl_copy_deref(ctx, &deref_copy, deref))
return false;
hlsl_src_remove(&deref_copy.path[i_cut]);
hlsl_src_from_node(&deref_copy.path[i_cut], const_i);
if (!(specific_load = hlsl_new_load_index(ctx, &deref_copy, NULL, &cut_index->loc)))
{
hlsl_cleanup_deref(&deref_copy);
return false;
}
hlsl_block_add_instr(block, &specific_load->node);
hlsl_cleanup_deref(&deref_copy);
operands[0] = equals;
operands[1] = &specific_load->node;
operands[2] = &var_load->node;
if (!(ternary = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc)))
return false;
hlsl_block_add_instr(block, ternary);
if (!(var_store = hlsl_new_simple_store(ctx, var, ternary)))
return false;
hlsl_block_add_instr(block, var_store);
}
if (!(load = hlsl_new_var_load(ctx, var, &instr->loc)))
return false;
hlsl_block_add_instr(block, &load->node);
return true;
}
/* Lower combined samples and sampler variables to synthesized separated textures and samplers.
* That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */
static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
@ -6241,6 +6358,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
if (profile->major_version < 4)
{
while (lower_ir(ctx, lower_nonconstant_array_loads, body));
lower_ir(ctx, lower_ternary, body);
lower_ir(ctx, lower_nonfloat_exprs, body);

View File

@ -1,4 +1,4 @@
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 f[3];
uniform float2 i;
@ -12,16 +12,16 @@ uniform 0 float4 1.0 2.0 3.0 4.0
uniform 4 float4 5.0 6.0 7.0 8.0
uniform 8 float4 9.0 10.0 11.0 12.0
uniform 12 float4 0 0 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0)
uniform 12 float4 1 0 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
uniform 12 float4 0 1 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
uniform 12 float4 1 1 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (9.0, 10.0, 11.0, 12.0)
@ -49,7 +49,7 @@ todo(glsl) draw quad
probe (0, 0) rgba (14.0, 14.0, 14.0, 14.0)
[pixel shader todo(sm<4)]
[pixel shader]
float i;
float4 main() : sv_target
@ -61,7 +61,7 @@ float4 main() : sv_target
[test]
uniform 0 float 2.3
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (3, 3, 3, 3)
@ -90,7 +90,7 @@ todo(sm<4 | glsl) draw quad
probe (0, 0) rgba (24.0, 0.0, 21.0, 1.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float2 i;
float4 main() : sv_target
@ -102,20 +102,20 @@ float4 main() : sv_target
[test]
uniform 0 float4 0 0 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0)
uniform 0 float4 1 0 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
uniform 0 float4 0 1 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
uniform 0 float4 1 1 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (9.0, 10.0, 11.0, 12.0)
[pixel shader todo(sm<4)]
[pixel shader]
float4 a;
float4 main() : sv_target
@ -130,11 +130,11 @@ float4 main() : sv_target
[test]
uniform 0 float4 0 0 2.4 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (1.0, 120.0, 90.0, 4.0)
[pixel shader todo(sm<4)]
[pixel shader]
float i, j;
float4 main() : sv_target
@ -148,16 +148,16 @@ float4 main() : sv_target
if(sm<4) uniform 0 float 3
if(sm<4) uniform 4 float 1
if(sm>=4) uniform 0 float4 3 1 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (7, 7, 7, 7)
if(sm<4) uniform 0 float 5
if(sm<4) uniform 4 float 0
if(sm>=4) uniform 0 float4 5 0 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (10, 10, 10, 10)
[pixel shader todo(sm<4)]
[pixel shader]
float i, j;
float k;
@ -186,17 +186,17 @@ if(sm<4) uniform 0 float 2
if(sm<4) uniform 4 float 1
if(sm<4) uniform 8 float -1
if(sm>=4) uniform 0 float4 2 1 -1 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (30, 31, 32, 33)
if(sm<4) uniform 0 float 1
if(sm<4) uniform 4 float 0
if(sm<4) uniform 8 float 1
if(sm>=4) uniform 0 float4 1 0 1 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (70, 71, 72, 73)
[pixel shader todo(sm<4)]
[pixel shader]
float i, j;
float4 main() : sv_target
@ -214,12 +214,12 @@ float4 main() : sv_target
if(sm<4) uniform 0 float 11
if(sm<4) uniform 4 float 12
if(sm>=4) uniform 0 float4 11 12 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (5, 5, 5, 5)
if(sm<4) uniform 0 float 13
if(sm<4) uniform 4 float 10
if(sm>=4) uniform 0 float4 13 10 0 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe (0, 0) rgba (9, 9, 9, 9)
@ -309,7 +309,7 @@ probe (0, 0) rgba (1, 5, 3, 4)
% reset requirements
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 f[4];
uniform uint4 u;
uniform uint4 v;
@ -332,7 +332,7 @@ uniform 8 float 3.0
uniform 12 float 4.0
uniform 16 uint4 3 1 0 2
uniform 20 uint4 0 3 1 2
todo(sm<4 | glsl) draw quad
if(sm<4) todo probe (0,0) rgba (1.0, 1.0, 1.0, 1.0)
todo(glsl) draw quad
if(sm<4) probe (0,0) rgba (1.0, 1.0, 1.0, 1.0)
if(sm>=4 & sm<6) todo probe (0,0) rgba (4.0, 4.0, 4.0, 4.0)
if(sm>=6) probe (0,0) rgba (4.0, 3.0, 2.0, 1.0)

View File

@ -375,7 +375,7 @@ draw quad
probe (0, 0) rgba (6, 1, 0, 0)
[pixel shader todo]
[pixel shader]
// Relative addressing extends the allocation size only up to the array's size.
float idx;
@ -405,8 +405,8 @@ uniform 8 float 2
uniform 12 float 3
uniform 16 float 4
uniform 20 float 3
todo draw quad
draw quad
probe (0, 0) rgba (3, 3, 3, 3)
uniform 20 float 1
todo draw quad
draw quad
probe (0, 0) rgba (1, 1, 1, 1)