From 5c986b9cde97e0def4756f8c6692f9a7bbe22a91 Mon Sep 17 00:00:00 2001 From: Francisco Casas Date: Wed, 1 Nov 2023 17:07:46 -0300 Subject: [PATCH] vkd3d-shader/hlsl: Lower SLT instructions for pixel shaders. Properly passing the inverse-trig.shader_test tests whose qualifiers have been removed requires making spirv.c capable of handling ABS. The same happens for the ps_3_0 equality test in float-comparison.shader_test. --- libs/vkd3d-shader/hlsl.c | 10 +++ libs/vkd3d-shader/hlsl.h | 2 + libs/vkd3d-shader/hlsl_codegen.c | 63 +++++++++++++++++++ tests/hlsl/any.shader_test | 24 +++---- tests/hlsl/expr-indexing.shader_test | 14 ++--- tests/hlsl/float-comparison.shader_test | 10 +-- tests/hlsl/inverse-trig.shader_test | 4 +- tests/hlsl/matrix-indexing.shader_test | 6 +- tests/hlsl/non-const-indexing.shader_test | 10 +-- tests/hlsl/step.shader_test | 4 +- .../hlsl/vector-indexing-uniform.shader_test | 4 +- 11 files changed, 113 insertions(+), 38 deletions(-) diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 2820b9ab..a82334e5 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -1348,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; + + assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); +} + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 9aae913a..fd6c4f16 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1248,6 +1248,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 09c21f16..19ed4042 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3141,6 +3141,67 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node return true; } +/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to + * CMP instructions (only available in pixel shaders). + * Based on the following equivalence: + * SLT(x, y) + * = (x < y) ? 1.0 : 0.0 + * = ((x - y) >= 0) ? 0.0 : 1.0 + * = CMP(x - y, 0.0, 1.0) + */ +static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_SLT) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) + return false; + hlsl_block_add_instr(block, cmp); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -5331,6 +5392,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); lower_ir(ctx, lower_comparison_operators, body); + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + lower_ir(ctx, lower_slt, body); } if (profile->major_version < 2) diff --git a/tests/hlsl/any.shader_test b/tests/hlsl/any.shader_test index 83a1dad9..45df3bdc 100644 --- a/tests/hlsl/any.shader_test +++ b/tests/hlsl/any.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f; float4 main() : sv_target @@ -8,28 +8,28 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 1.0 1.0 1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 1.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 1.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) uniform 0 float4 -1.0 -1.0 -1.0 -1.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) -[pixel shader todo(sm<4)] +[pixel shader] uniform float f; float4 main() : sv_target @@ -39,13 +39,13 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) uniform 0 float4 0.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) uniform 0 float4 -1.0 0.0 0.0 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) diff --git a/tests/hlsl/expr-indexing.shader_test b/tests/hlsl/expr-indexing.shader_test index 2aa99b40..3dcc5727 100644 --- a/tests/hlsl/expr-indexing.shader_test +++ b/tests/hlsl/expr-indexing.shader_test @@ -13,7 +13,7 @@ draw quad probe all rgba (8.0, 8.0, 8.0, 8.0) -[pixel shader todo(sm<4)] +[pixel shader] float4 a, b; float i; @@ -26,7 +26,7 @@ float4 main() : sv_target uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float4 5.0 6.0 7.0 8.0 uniform 8 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (10.0, 10.0, 10.0, 10.0) @@ -44,7 +44,7 @@ draw quad probe all rgba (3.0, 3.0, 3.0, 3.0) -[pixel shader todo(sm<4)] +[pixel shader] float4 a; float i; @@ -56,10 +56,10 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 0 -todo(sm<4) draw quad +draw quad probe all rgba (4.0, 4.0, 4.0, 4.0) uniform 4 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 1.0) @@ -82,7 +82,7 @@ draw quad probe all rgba (4.0, 4.0, 4.0, 4.0) -[pixel shader todo(sm<4)] +[pixel shader] float4 a; float i; @@ -99,5 +99,5 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 uniform 4 float 1 -todo(sm<4) draw quad +draw quad probe all rgba (2.0, 2.0, 2.0, 2.0) diff --git a/tests/hlsl/float-comparison.shader_test b/tests/hlsl/float-comparison.shader_test index 6062643d..9a5ec93c 100644 --- a/tests/hlsl/float-comparison.shader_test +++ b/tests/hlsl/float-comparison.shader_test @@ -1,5 +1,5 @@ % Check that -0.0f is not less than 0.0f -[pixel shader todo(sm<4)] +[pixel shader] float a; float4 main() : sv_target @@ -9,7 +9,7 @@ float4 main() : sv_target [test] uniform 0 float 0.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) @@ -77,7 +77,7 @@ if(sm>=6) probe all rgba (1010101.0, 11110000.0, 1101001.0, 1.0) shader model >= 2.0 shader model < 3.0 -[pixel shader todo(sm<4)] +[pixel shader] float4 a, b; float4 main() : sv_target @@ -88,7 +88,7 @@ float4 main() : sv_target [test] uniform 0 float4 1e-37 1e-37 1e+38 1e+38 uniform 4 float4 0 -1e-37 1e+38 -1e+38 -todo(sm<4) draw quad +draw quad probe all rgba (1.0, 1.0, 1.0, 0.0) @@ -96,7 +96,7 @@ probe all rgba (1.0, 1.0, 1.0, 0.0) shader model >= 3.0 shader model < 4.0 -[pixel shader todo(sm<4)] +[pixel shader] float4 a, b; float4 main() : sv_target diff --git a/tests/hlsl/inverse-trig.shader_test b/tests/hlsl/inverse-trig.shader_test index e428d309..27a5025c 100644 --- a/tests/hlsl/inverse-trig.shader_test +++ b/tests/hlsl/inverse-trig.shader_test @@ -3,7 +3,7 @@ % implementations. DXIL defines intrinsics for inverse trig, to be implemented % by the backend. -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a; float4 main() : sv_target @@ -32,7 +32,7 @@ uniform 0 float4 1.0 0.0 0.0 0.0 todo(sm<4) draw quad probe all rgba (0.0, 0.0, 0.0, 0.0) 128 -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 a; float4 main() : sv_target diff --git a/tests/hlsl/matrix-indexing.shader_test b/tests/hlsl/matrix-indexing.shader_test index 6e2f01b7..f1726763 100644 --- a/tests/hlsl/matrix-indexing.shader_test +++ b/tests/hlsl/matrix-indexing.shader_test @@ -108,7 +108,7 @@ draw quad probe all rgba (3.0, 4.0, 50.0, 60.0) -[pixel shader todo(sm<4)] +[pixel shader] uniform float i; float4 main() : sv_target @@ -120,8 +120,8 @@ float4 main() : sv_target [test] uniform 0 float 2 -todo(sm<4) draw quad -probe all rgba (8, 9, 10, 11) +draw quad +todo(sm<4) probe all rgba (8, 9, 10, 11) [pixel shader todo(sm<4)] diff --git a/tests/hlsl/non-const-indexing.shader_test b/tests/hlsl/non-const-indexing.shader_test index aed33092..d1146334 100644 --- a/tests/hlsl/non-const-indexing.shader_test +++ b/tests/hlsl/non-const-indexing.shader_test @@ -25,7 +25,7 @@ todo(sm<4) draw quad probe all rgba (9.0, 10.0, 11.0, 12.0) -[pixel shader todo(sm<4)] +[pixel shader] uniform float i; float4 main() : SV_TARGET @@ -36,16 +36,16 @@ float4 main() : SV_TARGET [test] uniform 0 float 0 -todo(sm<4) draw quad +draw quad probe all rgba (11.0, 11.0, 11.0, 11.0) uniform 0 float 1 -todo(sm<4) draw quad +draw quad probe all rgba (12.0, 12.0, 12.0, 12.0) uniform 0 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (13.0, 13.0, 13.0, 13.0) uniform 0 float 3 -todo(sm<4) draw quad +draw quad probe all rgba (14.0, 14.0, 14.0, 14.0) diff --git a/tests/hlsl/step.shader_test b/tests/hlsl/step.shader_test index b965f33e..e201e15f 100644 --- a/tests/hlsl/step.shader_test +++ b/tests/hlsl/step.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm<4)] +[pixel shader] uniform float4 f, p; float4 main() : sv_target @@ -9,7 +9,7 @@ float4 main() : sv_target [test] uniform 0 float4 5.0 -2.6 3.0 2.0 uniform 4 float4 1.0 -4.3 3.0 4.0 -todo(sm<4) draw quad +draw quad probe all rgba (0.0, 0.0, 1.0, 1.0) diff --git a/tests/hlsl/vector-indexing-uniform.shader_test b/tests/hlsl/vector-indexing-uniform.shader_test index cd77462e..e5ffbdd0 100644 --- a/tests/hlsl/vector-indexing-uniform.shader_test +++ b/tests/hlsl/vector-indexing-uniform.shader_test @@ -1,6 +1,6 @@ % Use a uniform to prevent the compiler from optimizing. -[pixel shader todo(sm<4)] +[pixel shader] uniform float i; float4 main() : SV_TARGET { @@ -12,5 +12,5 @@ float4 main() : SV_TARGET [test] uniform 0 float 2 -todo(sm<4) draw quad +draw quad probe all rgba (0.5, 0.3, 0.8, 0.2)