mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2024-11-21 16:46:41 -08:00
vkd3d-shader/hlsl: Lower non-constant array loads for SM1.
This is achieved by means of creating a variable storing zero, loading every array element, comparing if the non-constant index matches the index of that element at runtime, and in that case store the corresponding element in the variable. This seems to be the same strategy that the native compiler uses.
This commit is contained in:
parent
e0cfd8f86a
commit
9f515a9daa
Notes:
Henri Verbeet
2024-08-08 23:47:10 +02:00
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/936
@ -2633,6 +2633,123 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc
|
||||
return false;
|
||||
}
|
||||
|
||||
/* This pass flattens array loads that include the indexing of a non-constant index into multiple
|
||||
* constant loads, where the value of only one of them ends up in the resulting node.
|
||||
* This is achieved through a synthetic variable. The non-constant index is compared for equality
|
||||
* with every possible value it can have within the array bounds, and the ternary operator is used
|
||||
* to update the value of the synthetic var when the equality check passes. */
|
||||
static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
|
||||
struct hlsl_block *block)
|
||||
{
|
||||
struct hlsl_constant_value zero_value = {0};
|
||||
struct hlsl_ir_node *cut_index, *zero, *store;
|
||||
const struct hlsl_deref *deref;
|
||||
struct hlsl_type *cut_type;
|
||||
struct hlsl_ir_load *load;
|
||||
struct hlsl_ir_var *var;
|
||||
unsigned int i, i_cut;
|
||||
|
||||
if (instr->type != HLSL_IR_LOAD)
|
||||
return false;
|
||||
load = hlsl_ir_load(instr);
|
||||
deref = &load->src;
|
||||
|
||||
if (deref->path_len == 0)
|
||||
return false;
|
||||
|
||||
for (i = deref->path_len - 1; ; --i)
|
||||
{
|
||||
if (deref->path[i].node->type != HLSL_IR_CONSTANT)
|
||||
{
|
||||
i_cut = i;
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
cut_index = deref->path[i_cut].node;
|
||||
cut_type = deref->var->data_type;
|
||||
for (i = 0; i < i_cut; ++i)
|
||||
cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node);
|
||||
|
||||
if (cut_type->class != HLSL_CLASS_ARRAY)
|
||||
{
|
||||
VKD3D_ASSERT(hlsl_type_is_row_major(cut_type));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(var = hlsl_new_synthetic_var(ctx, "array_load", instr->data_type, &instr->loc)))
|
||||
return false;
|
||||
|
||||
if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, zero);
|
||||
|
||||
if (!(store = hlsl_new_simple_store(ctx, var, zero)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, store);
|
||||
|
||||
TRACE("Lowering non-constant array load on variable '%s'.\n", deref->var->name);
|
||||
for (i = 0; i < cut_type->e.array.elements_count; ++i)
|
||||
{
|
||||
struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL);
|
||||
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
|
||||
struct hlsl_ir_node *const_i, *equals, *ternary, *var_store;
|
||||
struct hlsl_ir_load *var_load, *specific_load;
|
||||
struct hlsl_deref deref_copy = {0};
|
||||
|
||||
if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, const_i);
|
||||
|
||||
operands[0] = cut_index;
|
||||
operands[1] = const_i;
|
||||
if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, equals);
|
||||
|
||||
if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, equals);
|
||||
|
||||
if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, &var_load->node);
|
||||
|
||||
if (!hlsl_copy_deref(ctx, &deref_copy, deref))
|
||||
return false;
|
||||
hlsl_src_remove(&deref_copy.path[i_cut]);
|
||||
hlsl_src_from_node(&deref_copy.path[i_cut], const_i);
|
||||
|
||||
if (!(specific_load = hlsl_new_load_index(ctx, &deref_copy, NULL, &cut_index->loc)))
|
||||
{
|
||||
hlsl_cleanup_deref(&deref_copy);
|
||||
return false;
|
||||
}
|
||||
hlsl_block_add_instr(block, &specific_load->node);
|
||||
|
||||
hlsl_cleanup_deref(&deref_copy);
|
||||
|
||||
operands[0] = equals;
|
||||
operands[1] = &specific_load->node;
|
||||
operands[2] = &var_load->node;
|
||||
if (!(ternary = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, ternary);
|
||||
|
||||
if (!(var_store = hlsl_new_simple_store(ctx, var, ternary)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, var_store);
|
||||
}
|
||||
|
||||
if (!(load = hlsl_new_var_load(ctx, var, &instr->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, &load->node);
|
||||
|
||||
return true;
|
||||
}
|
||||
/* Lower combined samples and sampler variables to synthesized separated textures and samplers.
|
||||
* That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */
|
||||
static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
|
||||
@ -6241,6 +6358,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
|
||||
|
||||
if (profile->major_version < 4)
|
||||
{
|
||||
while (lower_ir(ctx, lower_nonconstant_array_loads, body));
|
||||
|
||||
lower_ir(ctx, lower_ternary, body);
|
||||
|
||||
lower_ir(ctx, lower_nonfloat_exprs, body);
|
||||
|
@ -1,4 +1,4 @@
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
uniform float4 f[3];
|
||||
uniform float2 i;
|
||||
|
||||
@ -12,16 +12,16 @@ uniform 0 float4 1.0 2.0 3.0 4.0
|
||||
uniform 4 float4 5.0 6.0 7.0 8.0
|
||||
uniform 8 float4 9.0 10.0 11.0 12.0
|
||||
uniform 12 float4 0 0 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0)
|
||||
uniform 12 float4 1 0 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
|
||||
uniform 12 float4 0 1 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
|
||||
uniform 12 float4 1 1 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (9.0, 10.0, 11.0, 12.0)
|
||||
|
||||
|
||||
@ -49,7 +49,7 @@ todo(glsl) draw quad
|
||||
probe (0, 0) rgba (14.0, 14.0, 14.0, 14.0)
|
||||
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
float i;
|
||||
|
||||
float4 main() : sv_target
|
||||
@ -61,7 +61,7 @@ float4 main() : sv_target
|
||||
|
||||
[test]
|
||||
uniform 0 float 2.3
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (3, 3, 3, 3)
|
||||
|
||||
|
||||
@ -90,7 +90,7 @@ todo(sm<4 | glsl) draw quad
|
||||
probe (0, 0) rgba (24.0, 0.0, 21.0, 1.0)
|
||||
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
uniform float2 i;
|
||||
|
||||
float4 main() : sv_target
|
||||
@ -102,20 +102,20 @@ float4 main() : sv_target
|
||||
|
||||
[test]
|
||||
uniform 0 float4 0 0 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0)
|
||||
uniform 0 float4 1 0 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
|
||||
uniform 0 float4 0 1 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (5.0, 6.0, 7.0, 8.0)
|
||||
uniform 0 float4 1 1 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (9.0, 10.0, 11.0, 12.0)
|
||||
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
float4 a;
|
||||
|
||||
float4 main() : sv_target
|
||||
@ -130,11 +130,11 @@ float4 main() : sv_target
|
||||
|
||||
[test]
|
||||
uniform 0 float4 0 0 2.4 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (1.0, 120.0, 90.0, 4.0)
|
||||
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
float i, j;
|
||||
|
||||
float4 main() : sv_target
|
||||
@ -148,16 +148,16 @@ float4 main() : sv_target
|
||||
if(sm<4) uniform 0 float 3
|
||||
if(sm<4) uniform 4 float 1
|
||||
if(sm>=4) uniform 0 float4 3 1 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (7, 7, 7, 7)
|
||||
if(sm<4) uniform 0 float 5
|
||||
if(sm<4) uniform 4 float 0
|
||||
if(sm>=4) uniform 0 float4 5 0 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (10, 10, 10, 10)
|
||||
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
float i, j;
|
||||
float k;
|
||||
|
||||
@ -186,17 +186,17 @@ if(sm<4) uniform 0 float 2
|
||||
if(sm<4) uniform 4 float 1
|
||||
if(sm<4) uniform 8 float -1
|
||||
if(sm>=4) uniform 0 float4 2 1 -1 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (30, 31, 32, 33)
|
||||
if(sm<4) uniform 0 float 1
|
||||
if(sm<4) uniform 4 float 0
|
||||
if(sm<4) uniform 8 float 1
|
||||
if(sm>=4) uniform 0 float4 1 0 1 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (70, 71, 72, 73)
|
||||
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
float i, j;
|
||||
|
||||
float4 main() : sv_target
|
||||
@ -214,12 +214,12 @@ float4 main() : sv_target
|
||||
if(sm<4) uniform 0 float 11
|
||||
if(sm<4) uniform 4 float 12
|
||||
if(sm>=4) uniform 0 float4 11 12 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (5, 5, 5, 5)
|
||||
if(sm<4) uniform 0 float 13
|
||||
if(sm<4) uniform 4 float 10
|
||||
if(sm>=4) uniform 0 float4 13 10 0 0
|
||||
todo(sm<4 | glsl) draw quad
|
||||
todo(glsl) draw quad
|
||||
probe (0, 0) rgba (9, 9, 9, 9)
|
||||
|
||||
|
||||
@ -309,7 +309,7 @@ probe (0, 0) rgba (1, 5, 3, 4)
|
||||
% reset requirements
|
||||
|
||||
|
||||
[pixel shader todo(sm<4)]
|
||||
[pixel shader]
|
||||
uniform float4 f[4];
|
||||
uniform uint4 u;
|
||||
uniform uint4 v;
|
||||
@ -332,7 +332,7 @@ uniform 8 float 3.0
|
||||
uniform 12 float 4.0
|
||||
uniform 16 uint4 3 1 0 2
|
||||
uniform 20 uint4 0 3 1 2
|
||||
todo(sm<4 | glsl) draw quad
|
||||
if(sm<4) todo probe (0,0) rgba (1.0, 1.0, 1.0, 1.0)
|
||||
todo(glsl) draw quad
|
||||
if(sm<4) probe (0,0) rgba (1.0, 1.0, 1.0, 1.0)
|
||||
if(sm>=4 & sm<6) todo probe (0,0) rgba (4.0, 4.0, 4.0, 4.0)
|
||||
if(sm>=6) probe (0,0) rgba (4.0, 3.0, 2.0, 1.0)
|
||||
|
@ -375,7 +375,7 @@ draw quad
|
||||
probe (0, 0) rgba (6, 1, 0, 0)
|
||||
|
||||
|
||||
[pixel shader todo]
|
||||
[pixel shader]
|
||||
// Relative addressing extends the allocation size only up to the array's size.
|
||||
float idx;
|
||||
|
||||
@ -405,8 +405,8 @@ uniform 8 float 2
|
||||
uniform 12 float 3
|
||||
uniform 16 float 4
|
||||
uniform 20 float 3
|
||||
todo draw quad
|
||||
draw quad
|
||||
probe (0, 0) rgba (3, 3, 3, 3)
|
||||
uniform 20 float 1
|
||||
todo draw quad
|
||||
draw quad
|
||||
probe (0, 0) rgba (1, 1, 1, 1)
|
||||
|
Loading…
Reference in New Issue
Block a user