vkd3d-shader/hlsl: Lower SLT instructions for pixel shaders.

Properly passing the inverse-trig.shader_test tests whose qualifiers
have been removed requires making spirv.c capable of handling ABS.
The same happens for the ps_3_0 equality test in
float-comparison.shader_test.
This commit is contained in:
Francisco Casas 2023-11-01 17:07:46 -03:00 committed by Alexandre Julliard
parent 4133c4d801
commit 5c986b9cde
Notes: Alexandre Julliard 2024-03-14 23:24:02 +01:00
Approved-by: Zebediah Figura (@zfigura)
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/706
11 changed files with 113 additions and 38 deletions

View File

@ -1348,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp
return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
}
struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3)
{
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3};
assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type));
assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type));
return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
}
struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition,
struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc)
{

View File

@ -1248,6 +1248,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond
struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx,
enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);

View File

@ -3141,6 +3141,67 @@ static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node
return true;
}
/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to
* CMP instructions (only available in pixel shaders).
* Based on the following equivalence:
* SLT(x, y)
* = (x < y) ? 1.0 : 0.0
* = ((x - y) >= 0) ? 0.0 : 1.0
* = CMP(x - y, 0.0, 1.0)
*/
static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp;
struct hlsl_constant_value zero_value, one_value;
struct hlsl_type *float_type;
struct hlsl_ir_expr *expr;
if (instr->type != HLSL_IR_EXPR)
return false;
expr = hlsl_ir_expr(instr);
if (expr->op != HLSL_OP2_SLT)
return false;
arg1 = expr->operands[0].node;
arg2 = expr->operands[1].node;
float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc)))
return false;
hlsl_block_add_instr(block, arg1_cast);
if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc)))
return false;
hlsl_block_add_instr(block, arg2_cast);
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc)))
return false;
hlsl_block_add_instr(block, neg);
if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg)))
return false;
hlsl_block_add_instr(block, sub);
memset(&zero_value, 0, sizeof(zero_value));
if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc)))
return false;
hlsl_block_add_instr(block, zero);
one_value.u[0].f = 1.0;
one_value.u[1].f = 1.0;
one_value.u[2].f = 1.0;
one_value.u[3].f = 1.0;
if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
return false;
hlsl_block_add_instr(block, one);
if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one)))
return false;
hlsl_block_add_instr(block, cmp);
return true;
}
static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
struct hlsl_type *type = instr->data_type, *arg_type;
@ -5331,6 +5392,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_ir(ctx, lower_ceil, body);
lower_ir(ctx, lower_floor, body);
lower_ir(ctx, lower_comparison_operators, body);
if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
lower_ir(ctx, lower_slt, body);
}
if (profile->major_version < 2)

View File

@ -1,4 +1,4 @@
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 f;
float4 main() : sv_target
@ -8,28 +8,28 @@ float4 main() : sv_target
[test]
uniform 0 float4 1.0 1.0 1.0 1.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
uniform 0 float4 0.0 1.0 0.0 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
uniform 0 float4 0.0 0.0 1.0 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
uniform 0 float4 0.0 0.0 0.0 1.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
uniform 0 float4 0.0 0.0 0.0 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0)
uniform 0 float4 -1.0 -1.0 -1.0 -1.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float f;
float4 main() : sv_target
@ -39,13 +39,13 @@ float4 main() : sv_target
[test]
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
uniform 0 float4 0.0 0.0 0.0 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0)
uniform 0 float4 -1.0 0.0 0.0 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)

View File

@ -13,7 +13,7 @@ draw quad
probe all rgba (8.0, 8.0, 8.0, 8.0)
[pixel shader todo(sm<4)]
[pixel shader]
float4 a, b;
float i;
@ -26,7 +26,7 @@ float4 main() : sv_target
uniform 0 float4 1.0 2.0 3.0 4.0
uniform 4 float4 5.0 6.0 7.0 8.0
uniform 8 float 2
todo(sm<4) draw quad
draw quad
probe all rgba (10.0, 10.0, 10.0, 10.0)
@ -44,7 +44,7 @@ draw quad
probe all rgba (3.0, 3.0, 3.0, 3.0)
[pixel shader todo(sm<4)]
[pixel shader]
float4 a;
float i;
@ -56,10 +56,10 @@ float4 main() : sv_target
[test]
uniform 0 float4 1.0 2.0 3.0 4.0
uniform 4 float 0
todo(sm<4) draw quad
draw quad
probe all rgba (4.0, 4.0, 4.0, 4.0)
uniform 4 float 2
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 1.0)
@ -82,7 +82,7 @@ draw quad
probe all rgba (4.0, 4.0, 4.0, 4.0)
[pixel shader todo(sm<4)]
[pixel shader]
float4 a;
float i;
@ -99,5 +99,5 @@ float4 main() : sv_target
[test]
uniform 0 float4 1.0 2.0 3.0 4.0
uniform 4 float 1
todo(sm<4) draw quad
draw quad
probe all rgba (2.0, 2.0, 2.0, 2.0)

View File

@ -1,5 +1,5 @@
% Check that -0.0f is not less than 0.0f
[pixel shader todo(sm<4)]
[pixel shader]
float a;
float4 main() : sv_target
@ -9,7 +9,7 @@ float4 main() : sv_target
[test]
uniform 0 float 0.0
todo(sm<4) draw quad
draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0)
@ -77,7 +77,7 @@ if(sm>=6) probe all rgba (1010101.0, 11110000.0, 1101001.0, 1.0)
shader model >= 2.0
shader model < 3.0
[pixel shader todo(sm<4)]
[pixel shader]
float4 a, b;
float4 main() : sv_target
@ -88,7 +88,7 @@ float4 main() : sv_target
[test]
uniform 0 float4 1e-37 1e-37 1e+38 1e+38
uniform 4 float4 0 -1e-37 1e+38 -1e+38
todo(sm<4) draw quad
draw quad
probe all rgba (1.0, 1.0, 1.0, 0.0)
@ -96,7 +96,7 @@ probe all rgba (1.0, 1.0, 1.0, 0.0)
shader model >= 3.0
shader model < 4.0
[pixel shader todo(sm<4)]
[pixel shader]
float4 a, b;
float4 main() : sv_target

View File

@ -3,7 +3,7 @@
% implementations. DXIL defines intrinsics for inverse trig, to be implemented
% by the backend.
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 a;
float4 main() : sv_target
@ -32,7 +32,7 @@ uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4) draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0) 128
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 a;
float4 main() : sv_target

View File

@ -108,7 +108,7 @@ draw quad
probe all rgba (3.0, 4.0, 50.0, 60.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float i;
float4 main() : sv_target
@ -120,8 +120,8 @@ float4 main() : sv_target
[test]
uniform 0 float 2
todo(sm<4) draw quad
probe all rgba (8, 9, 10, 11)
draw quad
todo(sm<4) probe all rgba (8, 9, 10, 11)
[pixel shader todo(sm<4)]

View File

@ -25,7 +25,7 @@ todo(sm<4) draw quad
probe all rgba (9.0, 10.0, 11.0, 12.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float i;
float4 main() : SV_TARGET
@ -36,16 +36,16 @@ float4 main() : SV_TARGET
[test]
uniform 0 float 0
todo(sm<4) draw quad
draw quad
probe all rgba (11.0, 11.0, 11.0, 11.0)
uniform 0 float 1
todo(sm<4) draw quad
draw quad
probe all rgba (12.0, 12.0, 12.0, 12.0)
uniform 0 float 2
todo(sm<4) draw quad
draw quad
probe all rgba (13.0, 13.0, 13.0, 13.0)
uniform 0 float 3
todo(sm<4) draw quad
draw quad
probe all rgba (14.0, 14.0, 14.0, 14.0)

View File

@ -1,4 +1,4 @@
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 f, p;
float4 main() : sv_target
@ -9,7 +9,7 @@ float4 main() : sv_target
[test]
uniform 0 float4 5.0 -2.6 3.0 2.0
uniform 4 float4 1.0 -4.3 3.0 4.0
todo(sm<4) draw quad
draw quad
probe all rgba (0.0, 0.0, 1.0, 1.0)

View File

@ -1,6 +1,6 @@
% Use a uniform to prevent the compiler from optimizing.
[pixel shader todo(sm<4)]
[pixel shader]
uniform float i;
float4 main() : SV_TARGET
{
@ -12,5 +12,5 @@ float4 main() : SV_TARGET
[test]
uniform 0 float 2
todo(sm<4) draw quad
draw quad
probe all rgba (0.5, 0.3, 0.8, 0.2)