From 2d91bd9200894d873ca90984348b907f09544f1d Mon Sep 17 00:00:00 2001 From: Francisco Casas Date: Wed, 15 Jan 2025 10:45:39 -0300 Subject: [PATCH] vkd3d-shader/hlsl: Properly lower casts to int for negative numbers. While it looks complicated, it is what fxc/d3dcompiler does. A shader as simple as: float4 f; float4 main() : sv_target { return (int4)f; } results in the following instructions: ps_2_0 def c1, 0, 1, 0, 0 frc r0, c0 cmp r1, -r0, c1.x, c1.y add r0, -r0, c0 mov r2, c0 cmp r1, r2, c1.x, r1 add r0, r0, r1 mov oC0, r0 --- libs/vkd3d-shader/hlsl_codegen.c | 54 +++++++++++++++++++++++++++--- tests/hlsl/cast-to-int.shader_test | 4 +-- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 299e3a7f..fd533cfb 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -3107,11 +3107,24 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) return false; } -/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ +/* Turn CAST to int or uint as follows: + * + * CAST(x) = x - FRACT(x) + extra + * + * where + * + * extra = FRACT(x) > 0 && x < 0 + * + * where the comparisons in the extra term are performed using CMP. + * + * A REINTERPET (which is written as a mere MOV) is also applied to the final + * result for type consistency. + */ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { + struct hlsl_ir_node *arg, *fract, *neg_fract, *has_fract, *floor, *extra, *res, *zero, *one; struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; - struct hlsl_ir_node *arg, *floor, *res; + struct hlsl_constant_value zero_value, one_value; struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) @@ -3126,12 +3139,45 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (arg->data_type->e.numeric.type != HLSL_TYPE_FLOAT && arg->data_type->e.numeric.type != HLSL_TYPE_HALF) return false; - if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, arg->data_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, arg->data_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(fract = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, arg, &instr->loc))) + return false; + hlsl_block_add_instr(block, fract); + + if (!(neg_fract = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, fract, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_fract); + + if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one))) + return false; + hlsl_block_add_instr(block, has_fract); + + if (!(extra = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, arg, zero, has_fract))) + return false; + hlsl_block_add_instr(block, extra); + + if (!(floor = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg, neg_fract))) return false; hlsl_block_add_instr(block, floor); + if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, floor, extra))) + return false; + hlsl_block_add_instr(block, res); + memset(operands, 0, sizeof(operands)); - operands[0] = floor; + operands[0] = res; if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) return false; hlsl_block_add_instr(block, res); diff --git a/tests/hlsl/cast-to-int.shader_test b/tests/hlsl/cast-to-int.shader_test index 604ea263..9f4f7f7a 100644 --- a/tests/hlsl/cast-to-int.shader_test +++ b/tests/hlsl/cast-to-int.shader_test @@ -11,7 +11,7 @@ float4 main() : sv_target [test] uniform 0 float4 10.3 -11.6 12.8 13.1 draw quad -todo(sm<4) probe (0, 0) rgba(10, -11, 12, 0) +probe (0, 0) rgba(10, -11, 12, 0) [vertex shader] @@ -32,7 +32,7 @@ float4 main(float4 t1 : TEXCOORD1) : sv_target [test] uniform 0 float4 -0.4 -0.7 -12.8 14.8 draw quad -todo(sm<4) probe (0, 0) rgba(0, 0, -12, 14) +probe (0, 0) rgba(0, 0, -12, 14) [pixel shader todo(sm<4)]