From cf3e251a9f6ecd1c8654dd29a9111244953988e0 Mon Sep 17 00:00:00 2001 From: Nikolay Sivov Date: Thu, 17 Oct 2024 23:21:18 +0200 Subject: [PATCH] vkd3d-shader/hlsl: Implement the f32tof16() intrinsic. Signed-off-by: Nikolay Sivov --- Makefile.am | 1 + libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl.y | 16 ++++++ libs/vkd3d-shader/hlsl_codegen.c | 75 +++++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 6 +++ tests/hlsl/f32tof16.shader_test | 87 ++++++++++++++++++++++++++++++++ 7 files changed, 187 insertions(+) create mode 100644 tests/hlsl/f32tof16.shader_test diff --git a/Makefile.am b/Makefile.am index dbfb1f60..988c31c4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -121,6 +121,7 @@ vkd3d_shader_tests = \ tests/hlsl/exp.shader_test \ tests/hlsl/expr-indexing.shader_test \ tests/hlsl/f16tof32.shader_test \ + tests/hlsl/f32tof16.shader_test \ tests/hlsl/faceforward.shader_test \ tests/hlsl/ffp-point-size.shader_test \ tests/hlsl/float-comparison.shader_test \ diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 6ad0117f..c7aa148e 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -3164,6 +3164,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_F16TOF32] = "f16tof32", + [HLSL_OP1_F32TOF16] = "f32tof16", [HLSL_OP1_FLOOR] = "floor", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index b20f9646..2d5c2e8d 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -693,6 +693,7 @@ enum hlsl_ir_expr_op HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, HLSL_OP1_F16TOF32, + HLSL_OP1_F32TOF16, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, HLSL_OP1_LOG2, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 58c6071c..49cff4c8 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -4024,6 +4024,21 @@ static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); } +static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + + type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); +} + static bool intrinsic_floor(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -5199,6 +5214,7 @@ intrinsic_functions[] = {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"f16tof32", 1, true, intrinsic_f16tof32}, + {"f32tof16", 1, true, intrinsic_f32tof16}, {"faceforward", 3, true, intrinsic_faceforward}, {"floor", 1, true, intrinsic_floor}, {"fmod", 2, true, intrinsic_fmod}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 3da96745..6e1b2b43 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -7859,6 +7859,78 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return true; } +static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_node *call, *rhs, *store; + struct hlsl_ir_function_decl *func; + unsigned int component_count; + struct hlsl_ir_load *load; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + static const char template[] = + "typedef uint%u uintX;\n" + "uintX soft_f32tof16(float%u x)\n" + "{\n" + " uintX v = asuint(x);\n" + " uintX v_abs = v & 0x7fffffff;\n" + " uintX sign_bit = (v >> 16) & 0x8000;\n" + " uintX exp = (v >> 23) & 0xff;\n" + " uintX mantissa = v & 0x7fffff;\n" + " uintX nan16;\n" + " uintX nan = (v & 0x7f800000) == 0x7f800000;\n" + " uintX val;\n" + "\n" + " val = 113 - exp;\n" + " val = (mantissa + 0x800000) >> val;\n" + " val >>= 13;\n" + "\n" + " val = (exp - 127) < -38 ? 0 : val;\n" + "\n" + " val = v_abs < 0x38800000 ? val : (v_abs + 0xc8000000) >> 13;\n" + " val = v_abs > 0x47ffe000 ? 0x7bff : val;\n" + "\n" + " nan16 = (((v >> 13) | (v >> 3) | v) & 0x3ff) + 0x7c00;\n" + " val = nan ? nan16 : val;\n" + "\n" + " return (val & 0x7fff) + sign_bit;\n" + "}\n"; + + if (node->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(node); + + if (expr->op != HLSL_OP1_F32TOF16) + return false; + + rhs = expr->operands[0].node; + component_count = hlsl_type_component_count(rhs->data_type); + + if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) + return false; + + if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body))) + return false; + + lhs = func->parameters.vars[0]; + + if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) + return false; + hlsl_block_add_instr(block, store); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return false; + hlsl_block_add_instr(block, call); + + if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) + return false; + hlsl_block_add_instr(block, &load->node); + + return true; +} + static void process_entry_function(struct hlsl_ctx *ctx, const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) { @@ -7887,7 +7959,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, return; if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) + { lower_ir(ctx, lower_f16tof32, body); + lower_ir(ctx, lower_f32tof16, body); + } lower_return(ctx, entry_func, body, false); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 835a3846..b4bf75cc 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -5693,6 +5693,12 @@ static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_ write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); break; + case HLSL_OP1_F32TOF16: + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); + VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_F32TOF16, &expr->node, arg1, 0); + break; + case HLSL_OP1_FLOOR: VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); diff --git a/tests/hlsl/f32tof16.shader_test b/tests/hlsl/f32tof16.shader_test new file mode 100644 index 00000000..ef8efe9b --- /dev/null +++ b/tests/hlsl/f32tof16.shader_test @@ -0,0 +1,87 @@ +[require] +shader model >= 4.0 + +[pixel shader] +uniform float a; + +uint4 main() : sv_target +{ + return f32tof16(a); +} + +[test] +uniform 0 float 42.0 +draw quad +probe (0, 0) rgbaui (0x5140, 0x5140, 0x5140, 0x5140) + +% zero +uniform 0 float 0.0 +draw quad +probe (0, 0) rgbaui (0x0, 0x0, 0x0, 0x0) + +% negative zero +uniform 0 uint 0x8000 +draw quad +probe (0, 0) rgba(-0.0, -0.0, -0.0, -0.0) + +% subnormal number +uniform 0 float 5.9604645e-08 +draw quad +probe (0, 0) rgbaui (0x1, 0x1, 0x1, 0x1) + +% subnormal number +uniform 0 float 1.1920929e-07 +draw quad +probe (0, 0) rgbaui (0x2, 0x2, 0x2, 0x2) + +% subnormal number +uniform 0 float 2.3841858e-07 +draw quad +probe (0, 0) rgbaui (0x4, 0x4, 0x4, 0x4) + +% subnormal number +uniform 0 float 4.7683716e-07 +draw quad +probe (0, 0) rgbaui (0x8, 0x8, 0x8, 0x8) + +% subnormal number +uniform 0 float 9.536743e-07 +draw quad +probe (0, 0) rgbaui (0x10, 0x10, 0x10, 0x10) + +% subnormal number +uniform 0 float 1.9073486e-06 +draw quad +probe (0, 0) rgbaui (0x20, 0x20, 0x20, 0x20) + +% subnormal number +uniform 0 float 3.8146973e-06 +draw quad +probe (0, 0) rgbaui (0x40, 0x40, 0x40, 0x40) + +% subnormal number +uniform 0 float 7.6293945e-06 +draw quad +probe (0, 0) rgbaui (0x80, 0x80, 0x80, 0x80) + +% subnormal number +uniform 0 float 1.5258789e-05 +draw quad +probe (0, 0) rgbaui (0x100, 0x100, 0x100, 0x100) + +% subnormal number +uniform 0 float 3.0517578e-05 +draw quad +probe (0, 0) rgbaui (0x200, 0x200, 0x200, 0x200) + +% I'd love to use rgba probes here but msvc doesn't scanf infinity :( + +% positive infinity +uniform 0 uint 0x7f800000 +draw quad +probe (0, 0) rgbaui (0x7c00, 0x7c00, 0x7c00, 0x7c00) + +% negative infinity +uniform 0 uint 0xff800000 +draw quad +probe (0, 0) rgbaui (0xfc00, 0xfc00, 0xfc00, 0xfc00)