From ebf7573571d4bfcd3f38846886106f204fd7f0e8 Mon Sep 17 00:00:00 2001 From: Francisco Casas Date: Mon, 8 May 2023 18:25:18 -0400 Subject: [PATCH] vkd3d-shader/hlsl: Support non-constant vector indexing. Non-constant vector indexing is not solved with relative addressing in the register indexes because this indexation cannot be at the level of register-components. Mathematical operations must be used instead. --- libs/vkd3d-shader/hlsl.c | 13 +++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 88 ++++++++++++++++++- tests/array-index-expr.shader_test | 36 ++++---- tests/expr-indexing.shader_test | 22 ++--- tests/hlsl-matrix-indexing.shader_test | 6 +- .../hlsl-vector-indexing-uniform.shader_test | 4 +- 7 files changed, 134 insertions(+), 37 deletions(-) diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 152ec627..f1edfa5e 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1287,6 +1287,19 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl return load; } +struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc) +{ + /* This deref can only exists temporarily because it is not the real owner of its members. */ + struct hlsl_deref tmp_deref; + + assert(deref->path_len >= 1); + + tmp_deref = *deref; + tmp_deref.path_len = deref->path_len - 1; + return hlsl_new_load_index(ctx, &tmp_deref, NULL, loc); +} + struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, const struct vkd3d_shader_location *loc) { diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 7d02448e..914c05ef 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1129,6 +1129,8 @@ struct hlsl_ir_load *hlsl_new_var_load(struct hlsl_ctx *ctx, struct hlsl_ir_var const struct vkd3d_shader_location *loc); struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 9774ca08..72ab27d3 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -1992,6 +1992,81 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i return true; } +static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *idx; + struct hlsl_deref *deref; + struct hlsl_type *type; + unsigned int i; + + if (instr->type != HLSL_IR_LOAD) + return false; + + deref = &hlsl_ir_load(instr)->src; + assert(deref->var); + + if (deref->path_len == 0) + return false; + + type = deref->var->data_type; + for (i = 0; i < deref->path_len - 1; ++i) + type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node); + + idx = deref->path[deref->path_len - 1].node; + + if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT) + { + struct hlsl_ir_node *eq, *swizzle, *dot, *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_load *vector_load; + struct hlsl_ir_constant *c; + enum hlsl_ir_expr_op op; + + if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc))) + return false; + list_add_before(&instr->entry, &vector_load->node.entry); + + if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), type->dimx, idx, &instr->loc))) + return false; + list_add_before(&instr->entry, &swizzle->entry); + + if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, type->dimx), &instr->loc))) + return false; + c->value.u[0].u = 0; + c->value.u[1].u = 1; + c->value.u[2].u = 2; + c->value.u[3].u = 3; + list_add_before(&instr->entry, &c->node.entry); + + operands[0] = swizzle; + operands[1] = &c->node; + if (!(eq = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, + hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, type->dimx), &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + if (!(eq = hlsl_new_cast(ctx, eq, type, &instr->loc))) + return false; + list_add_before(&instr->entry, &eq->entry); + + op = HLSL_OP2_DOT; + if (type->dimx == 1) + op = type->base_type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL; + + /* Note: We may be creating a DOT for bool vectors here, which we need to lower to + * LOGIC_OR + LOGIC_AND. */ + operands[0] = &vector_load->node; + operands[1] = eq; + if (!(dot = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) + return false; + list_add_before(&instr->entry, &dot->entry); + hlsl_replace_node(instr, dot); + + return true; + } + + return false; +} + /* Lower DIV to RCP + MUL. */ static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { @@ -2395,6 +2470,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void struct hlsl_type *type = instr->data_type; struct hlsl_ir_expr *expr; unsigned int i, dimx; + bool is_bool; if (instr->type != HLSL_IR_EXPR) return false; @@ -2403,14 +2479,16 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void if (expr->op != HLSL_OP2_DOT) return false; - if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT) + if (type->base_type == HLSL_TYPE_INT || type->base_type == HLSL_TYPE_UINT + || type->base_type == HLSL_TYPE_BOOL) { arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; assert(arg1->data_type->dimx == arg2->data_type->dimx); dimx = arg1->data_type->dimx; + is_bool = type->base_type == HLSL_TYPE_BOOL; - if (!(mult = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, arg1, arg2))) + if (!(mult = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2))) return false; list_add_before(&instr->entry, &mult->entry); @@ -2426,7 +2504,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void res = comps[0]; for (i = 1; i < dimx; ++i) { - if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, res, comps[i]))) + if (!(res = hlsl_new_binary_expr(ctx, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]))) return false; list_add_before(&instr->entry, &res->entry); } @@ -4000,6 +4078,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } while (progress); + hlsl_transform_ir(ctx, lower_nonconstant_vector_derefs, body, NULL); + hlsl_transform_ir(ctx, lower_casts_to_bool, body, NULL); + hlsl_transform_ir(ctx, lower_int_dot, body, NULL); + if (profile->major_version < 4) { hlsl_transform_ir(ctx, lower_division, body, NULL); diff --git a/tests/array-index-expr.shader_test b/tests/array-index-expr.shader_test index 0c607677..0a83080c 100644 --- a/tests/array-index-expr.shader_test +++ b/tests/array-index-expr.shader_test @@ -25,7 +25,7 @@ todo draw quad todo probe all rgba (9.0, 10.0, 11.0, 12.0) -[pixel shader todo] +[pixel shader] uniform float i; float4 main() : SV_TARGET @@ -36,20 +36,20 @@ float4 main() : SV_TARGET [test] uniform 0 float 0 -todo draw quad -todo probe all rgba (11.0, 11.0, 11.0, 11.0) +draw quad +probe all rgba (11.0, 11.0, 11.0, 11.0) uniform 0 float 1 -todo draw quad -todo probe all rgba (12.0, 12.0, 12.0, 12.0) +draw quad +probe all rgba (12.0, 12.0, 12.0, 12.0) uniform 0 float 2 -todo draw quad -todo probe all rgba (13.0, 13.0, 13.0, 13.0) +draw quad +probe all rgba (13.0, 13.0, 13.0, 13.0) uniform 0 float 3 -todo draw quad -todo probe all rgba (14.0, 14.0, 14.0, 14.0) +draw quad +probe all rgba (14.0, 14.0, 14.0, 14.0) -[pixel shader todo] +[pixel shader] uniform float i; float4 main() : SV_TARGET @@ -61,17 +61,17 @@ float4 main() : SV_TARGET [test] uniform 0 float 0 -todo draw quad -todo probe all rgba (21.0, 1.0, 24.0, 0.0) +draw quad +probe all rgba (21.0, 1.0, 24.0, 0.0) uniform 0 float 1 -todo draw quad -todo probe all rgba (22.0, 0.0, 23.0, 1.0) +draw quad +probe all rgba (22.0, 0.0, 23.0, 1.0) uniform 0 float 2 -todo draw quad -todo probe all rgba (23.0, 1.0, 22.0, 0.0) +draw quad +probe all rgba (23.0, 1.0, 22.0, 0.0) uniform 0 float 3 -todo draw quad -todo probe all rgba (24.0, 0.0, 21.0, 1.0) +draw quad +probe all rgba (24.0, 0.0, 21.0, 1.0) [pixel shader todo] diff --git a/tests/expr-indexing.shader_test b/tests/expr-indexing.shader_test index 83a63d67..3dcc5727 100644 --- a/tests/expr-indexing.shader_test +++ b/tests/expr-indexing.shader_test @@ -13,7 +13,7 @@ draw quad probe all rgba (8.0, 8.0, 8.0, 8.0) -[pixel shader todo] +[pixel shader] float4 a, b; float i; @@ -26,8 +26,8 @@ float4 main() : sv_target uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float 2 -todo draw quad -todo probe all rgba (10.0, 10.0, 10.0, 10.0) +draw quad +probe all rgba (10.0, 10.0, 10.0, 10.0) [pixel shader] @@ -44,7 +44,7 @@ draw quad probe all rgba (3.0, 3.0, 3.0, 3.0) -[pixel shader todo] +[pixel shader] float4 a; float i; @@ -56,11 +56,11 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 0 -todo draw quad -todo probe all rgba (4.0, 4.0, 4.0, 4.0) +draw quad +probe all rgba (4.0, 4.0, 4.0, 4.0) uniform 4 float 2 -todo draw quad -todo probe all rgba (1.0, 1.0, 1.0, 1.0) +draw quad +probe all rgba (1.0, 1.0, 1.0, 1.0) [pixel shader] @@ -82,7 +82,7 @@ draw quad probe all rgba (4.0, 4.0, 4.0, 4.0) -[pixel shader todo] +[pixel shader] float4 a; float i; @@ -99,5 +99,5 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 1 -todo draw quad -todo probe all rgba (2.0, 2.0, 2.0, 2.0) +draw quad +probe all rgba (2.0, 2.0, 2.0, 2.0) diff --git a/tests/hlsl-matrix-indexing.shader_test b/tests/hlsl-matrix-indexing.shader_test index c3d08296..a57d8fb8 100644 --- a/tests/hlsl-matrix-indexing.shader_test +++ b/tests/hlsl-matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0) -[pixel shader todo] +[pixel shader] uniform float i; float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target [test] uniform 0 float 2 -todo draw quad -todo probe all rgba (8, 9, 10, 11) +draw quad +probe all rgba (8, 9, 10, 11) [pixel shader todo] diff --git a/tests/hlsl-vector-indexing-uniform.shader_test b/tests/hlsl-vector-indexing-uniform.shader_test index 968f570b..e5ffbdd0 100644 --- a/tests/hlsl-vector-indexing-uniform.shader_test +++ b/tests/hlsl-vector-indexing-uniform.shader_test @@ -1,6 +1,6 @@ % Use a uniform to prevent the compiler from optimizing. -[pixel shader todo] +[pixel shader] uniform float i; float4 main() : SV_TARGET { @@ -12,5 +12,5 @@ float4 main() : SV_TARGET [test] uniform 0 float 2 -todo draw quad +draw quad probe all rgba (0.5, 0.3, 0.8, 0.2)