vkd3d-shader/hlsl: Ensure that TERNARY condition is always bool.

Also, properly casting it to float in lower_ternary() for SM1
avoids creating ABS and NEG on bool types.
This commit is contained in:
Francisco Casas 2024-03-01 16:01:03 -03:00 committed by Alexandre Julliard
parent 9c0d04c862
commit 19fd43214b
Notes: Alexandre Julliard 2024-04-09 15:44:37 -05:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Zebediah Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/744
11 changed files with 114 additions and 122 deletions

View File

@ -601,12 +601,10 @@ enum hlsl_ir_expr_op
/* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy,
* then adds c. */
HLSL_OP3_DP2ADD,
/* MOVC(a, b, c) returns c if a is bitwise zero and b otherwise.
* TERNARY(a, b, c) returns c if a == 0 and b otherwise.
* They differ for floating point numbers, because
* -0.0 == 0.0, but it is not bitwise zero. CMP(a, b, c) returns b
if a >= 0, and c otherwise. It's used only for SM1-SM3 targets, while
SM4+ is using MOVC in such cases. */
/* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean.
* MOVC(a, b, c) returns 'c' if 'a' is bitwise zero and 'b' otherwise.
* CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets,
while SM4+ is using MOVC in such cases. */
HLSL_OP3_CMP,
HLSL_OP3_MOVC,
HLSL_OP3_TERNARY,

View File

@ -4405,26 +4405,34 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
return false;
}
else if (common_type->dimx == 1 && common_type->dimy == 1)
else
{
common_type = hlsl_get_numeric_type(ctx, cond_type->class,
common_type->base_type, cond_type->dimx, cond_type->dimy);
}
else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy)
{
/* This condition looks wrong but is correct.
* floatN is compatible with float1xN, but not with floatNx1. */
cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
cond_type->dimx, cond_type->dimy);
if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
return false;
struct vkd3d_string_buffer *cond_string, *value_string;
if (common_type->dimx == 1 && common_type->dimy == 1)
{
common_type = hlsl_get_numeric_type(ctx, cond_type->class,
common_type->base_type, cond_type->dimx, cond_type->dimy);
}
else if (cond_type->dimx != common_type->dimx || cond_type->dimy != common_type->dimy)
{
/* This condition looks wrong but is correct.
* floatN is compatible with float1xN, but not with floatNx1. */
cond_string = hlsl_type_to_string(ctx, cond_type);
value_string = hlsl_type_to_string(ctx, common_type);
if (cond_string && value_string)
hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Ternary condition type '%s' is not compatible with value type '%s'.",
cond_string->buffer, value_string->buffer);
hlsl_release_string_buffer(ctx, cond_string);
hlsl_release_string_buffer(ctx, value_string);
struct vkd3d_string_buffer *cond_string, *value_string;
cond_string = hlsl_type_to_string(ctx, cond_type);
value_string = hlsl_type_to_string(ctx, common_type);
if (cond_string && value_string)
hlsl_error(ctx, &first->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Ternary condition type '%s' is not compatible with value type '%s'.",
cond_string->buffer, value_string->buffer);
hlsl_release_string_buffer(ctx, cond_string);
hlsl_release_string_buffer(ctx, value_string);
}
}
if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc)))
@ -4449,9 +4457,16 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block,
hlsl_release_string_buffer(ctx, second_string);
}
cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
cond_type->dimx, cond_type->dimy);
if (!(cond = add_implicit_conversion(ctx, block, cond, cond_type, &cond->loc)))
return false;
common_type = first->data_type;
}
assert(cond->data_type->base_type == HLSL_TYPE_BOOL);
args[0] = cond;
args[1] = first;
args[2] = second;

View File

@ -2958,8 +2958,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st
static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement;
struct hlsl_ir_node *zero, *cond, *first, *second;
struct hlsl_constant_value zero_value = { 0 };
struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg;
struct hlsl_ir_expr *expr;
struct hlsl_type *type;
@ -2980,18 +2979,22 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
return false;
}
assert(cond->data_type->base_type == HLSL_TYPE_BOOL);
if (ctx->profile->major_version < 4)
{
struct hlsl_ir_node *abs, *neg;
type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT,
instr->data_type->dimx, instr->data_type->dimy);
if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc)))
if (!(float_cond = hlsl_new_cast(ctx, cond, type, &instr->loc)))
return false;
hlsl_block_add_instr(block, abs);
hlsl_block_add_instr(block, float_cond);
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc)))
if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, float_cond, &instr->loc)))
return false;
hlsl_block_add_instr(block, neg);
memset(operands, 0, sizeof(operands));
operands[0] = neg;
operands[1] = second;
operands[2] = first;
@ -3000,21 +3003,6 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
}
else
{
if (cond->data_type->base_type == HLSL_TYPE_FLOAT)
{
if (!(zero = hlsl_new_constant(ctx, cond->data_type, &zero_value, &instr->loc)))
return false;
hlsl_block_add_instr(block, zero);
operands[0] = zero;
operands[1] = cond;
type = cond->data_type;
type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->dimx, type->dimy);
if (!(cond = hlsl_new_expr(ctx, HLSL_OP2_NEQUAL, operands, type, &instr->loc)))
return false;
hlsl_block_add_instr(block, cond);
}
memset(operands, 0, sizeof(operands));
operands[0] = cond;
operands[1] = first;
@ -3319,11 +3307,21 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs,
struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false)
{
struct hlsl_type *cond_type = condition->data_type;
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS];
struct hlsl_ir_node *cond;
assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type));
if (cond_type->base_type != HLSL_TYPE_BOOL)
{
cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL, cond_type->dimx, cond_type->dimy);
if (!(condition = hlsl_new_cast(ctx, condition, cond_type, &condition->loc)))
return NULL;
hlsl_block_add_instr(instrs, condition);
}
operands[0] = condition;
operands[1] = if_true;
operands[2] = if_false;
@ -5400,11 +5398,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
hlsl_transform_ir(ctx, split_matrix_copies, body, NULL);
lower_ir(ctx, lower_narrowing_casts, body);
lower_ir(ctx, lower_casts_to_bool, body);
lower_ir(ctx, lower_int_dot, body);
lower_ir(ctx, lower_int_division, body);
lower_ir(ctx, lower_int_modulus, body);
lower_ir(ctx, lower_int_abs, body);
lower_ir(ctx, lower_casts_to_bool, body);
lower_ir(ctx, lower_float_modulus, body);
hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL);
do

View File

@ -1177,30 +1177,11 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
assert(dst_type->base_type == src2->node.data_type->base_type);
assert(dst_type->base_type == src3->node.data_type->base_type);
assert(src1->node.data_type->base_type == HLSL_TYPE_BOOL);
for (k = 0; k < dst_type->dimx; ++k)
{
switch (src1->node.data_type->base_type)
{
case HLSL_TYPE_FLOAT:
case HLSL_TYPE_HALF:
dst->u[k] = src1->value.u[k].f != 0.0f ? src2->value.u[k] : src3->value.u[k];
break;
dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k];
case HLSL_TYPE_DOUBLE:
dst->u[k] = src1->value.u[k].d != 0.0 ? src2->value.u[k] : src3->value.u[k];
break;
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
case HLSL_TYPE_BOOL:
dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k];
break;
default:
vkd3d_unreachable();
}
}
return true;
}

View File

@ -13,7 +13,7 @@ uniform 0 float4 5.0 15.0 0.0 0.0
todo(glsl) draw quad
probe all rgba (20.0, -10.0, 75.0, 0.33333333) 1
[pixel shader todo(sm<4)]
[pixel shader]
uniform float2 a;
float4 main() : SV_TARGET
@ -25,10 +25,10 @@ float4 main() : SV_TARGET
[test]
uniform 0 float4 5.0 15.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (5.0, 5.0, -5.0, 3.0) 1
[pixel shader todo(sm<4)]
[pixel shader]
uniform float2 a;
float4 main() : SV_TARGET
@ -40,10 +40,10 @@ float4 main() : SV_TARGET
[test]
uniform 0 float4 42.0 5.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (2.0, -2.0, 2.0, -2.0) 16
[pixel shader todo(sm<4)]
[pixel shader]
uniform float2 a;
float4 main() : SV_TARGET
@ -55,10 +55,10 @@ float4 main() : SV_TARGET
[test]
uniform 0 float4 45.0 5.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0)
[pixel shader todo(sm<4)]
[pixel shader]
float4 x, y;
float4 main() : sv_target
@ -69,7 +69,7 @@ float4 main() : sv_target
[test]
uniform 0 float4 5.0 -42.1 4.0 45.0
uniform 4 float4 15.0 -5.0 4.1 5.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (5.0, -2.1, 4.0, 0.0) 6
[require]

View File

@ -13,7 +13,7 @@ todo(glsl) draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 f;
float4 main() : sv_target
@ -55,7 +55,7 @@ float4 main() : sv_target
[test]
uniform 0 float4 0.0 1.5 1.5 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
% SM1-3 apparently treats '0/0' as zero.
if(sm<4) todo probe all rgba (1010101.0, 11001100.0, 1101001.0, 11.0)
% SM4-5 optimises away the 'not' by inverting the condition, even though this is invalid for NaN.

View File

@ -1,4 +1,4 @@
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 u;
float4 main() : sv_target
@ -8,13 +8,13 @@ float4 main() : sv_target
[test]
uniform 0 float4 -0.5 6.5 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-0.5, 0.0, 0.0, 0.0) 4
uniform 0 float4 1.1 0.3 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.2, 0.0, 0.0, 0.0) 4
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 u;
float4 main() : sv_target
@ -24,8 +24,8 @@ float4 main() : sv_target
[test]
uniform 0 float4 -0.5 6.5 2.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-0.5, 0.5, 0.0, 0.0) 4
uniform 0 float4 1.1 0.3 3.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.1, 0.3, 0.0, 0.0) 4

View File

@ -92,7 +92,7 @@ todo(glsl) draw quad
probe all rgba (31416.0, 0.0, 0.0, 0.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 a;
float4 main() : sv_target
@ -102,26 +102,26 @@ float4 main() : sv_target
[test]
uniform 0 float4 -1.0 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-0.785409629, 0.0, 0.0, 0.0) 512
uniform 0 float4 -0.5 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-0.4636476, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.5 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.4636476, 0.0, 0.0, 0.0) 256
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.785409629, 0.0, 0.0, 0.0) 512
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 a;
float4 main() : sv_target
@ -133,64 +133,64 @@ float4 main() : sv_target
[test]
% Non-degenerate cases
uniform 0 float4 1.0 1.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.785385, 0.0, 0.0, 0.0) 512
uniform 0 float4 5.0 -5.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (2.356194, 0.0, 0.0, 0.0) 256
uniform 0 float4 -3.0 -3.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-2.356194, 0.0, 0.0, 0.0) 256
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 -1.0 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 1.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.0, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 -1.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256
% Degenerate cases
uniform 0 float4 0.00001 0.00002 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.463647, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.00001 -0.00002 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (2.677945, 0.0, 0.0, 0.0) 256
uniform 0 float4 -0.00001 100000.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-0.000000000099986595, 0.0, 0.0, 0.0) 2048
uniform 0 float4 10000000.0 0.00000001 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
% Negative zero behavior should be to treat it the
% same as normal zero.
uniform 0 float4 1000000000.0 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 1000000000.0 -0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.570796, 0.0, 0.0, 0.0) 256
uniform 0 float4 0.0 -1.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256
uniform 0 float4 -0.0 -1.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (3.1415927, 0.0, 0.0, 0.0) 256

View File

@ -1,4 +1,4 @@
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 u;
float4 main() : sv_target
@ -8,20 +8,20 @@ float4 main() : sv_target
[test]
uniform 0 float4 -0.1 10.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.0, 0.0, 0.0, 1.0)
[test]
uniform 0 float4 1.2 -0.1 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.0, 1.2, 0.0, 1.0)
[test]
uniform 0 float4 1.2 2.0 3.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.0, 1.2, 8.0, 1.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 u;
float4 main() : sv_target
@ -31,7 +31,7 @@ float4 main() : sv_target
[test]
uniform 0 float4 1.2 2.0 3.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (2.0, 2.4, 16.0, 2.0)
[pixel shader fail]

View File

@ -3,7 +3,7 @@
shader model < 6.0
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 x;
float4 main() : sv_target
@ -13,14 +13,14 @@ float4 main() : sv_target
[test]
uniform 0 float4 2.0 3.0 4.0 5.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (2.0, 3.0, 4.0, 5.0)
uniform 0 float4 0.0 10.0 11.0 12.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (-1.0, 9.0, 10.0, 11.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float4 x;
float4 main() : sv_target
@ -35,11 +35,11 @@ float4 main() : sv_target
[test]
uniform 0 float4 1.1 3.0 4.0 5.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.1, 2.0, 0.0, 0.0)
[pixel shader todo(sm<4)]
[pixel shader]
float4 f;
float4 main() : sv_target
@ -51,7 +51,7 @@ float4 main() : sv_target
[test]
uniform 0 float4 1.0 0.0 0.0 0.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.5, 0.6, 0.7, 0.0)
@ -246,7 +246,7 @@ todo(glsl) draw quad
probe all rgba (3.0, 3.0, 3.0, 3.0)
[pixel shader todo(sm<4)]
[pixel shader]
uniform float cond;
uniform float4 a, b;
@ -260,7 +260,7 @@ float4 main() : sv_target
uniform 0 float4 1.0 0.0 0.0 0.0
uniform 4 float4 1.0 2.0 3.0 4.0
uniform 8 float4 5.0 6.0 7.0 8.0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (1.0, 2.0, 3.0, 4.0)

View File

@ -88,7 +88,7 @@ probe all rgba (1.0, 1.0, 1.0, 1.0)
% The ternary operator works differently in sm6. See sm6-ternary.shader_test.
shader model < 6.0
[vertex shader todo(sm<4)]
[vertex shader]
int a, b, c;
void main(out float4 res : COLOR1, in float4 pos : position, out float4 out_pos : sv_position)
@ -103,11 +103,11 @@ if(sm<4) uniform 0 float 0
if(sm<4) uniform 4 float 100
if(sm<4) uniform 8 float 200
if(sm>=4) uniform 0 int4 0 100 200 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.2, 0.2, 0.2, 0.2)
if(sm<4) uniform 0 float -4
if(sm<4) uniform 4 float 100
if(sm<4) uniform 8 float 200
if(sm>=4) uniform 0 int4 -4 100 200 0
todo(sm<4 | glsl) draw quad
todo(glsl) draw quad
probe all rgba (0.1, 0.1, 0.1, 0.1)