From 8c3a5e54585edf98e70bdf699d283e5d6d994f7a Mon Sep 17 00:00:00 2001 From: Victor Chiletto Date: Sat, 6 Jul 2024 12:24:22 -0300 Subject: [PATCH] vkd3d-shader/hlsl: Implement f16tof32 intrinsic. --- Makefile.am | 1 + libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl.y | 29 +++++++++++ libs/vkd3d-shader/hlsl_codegen.c | 87 ++++++++++++++++++++++++++++++++ libs/vkd3d-shader/tpf.c | 6 +++ tests/hlsl/f16tof32.shader_test | 87 ++++++++++++++++++++++++++++++++ 7 files changed, 212 insertions(+) create mode 100644 tests/hlsl/f16tof32.shader_test diff --git a/Makefile.am b/Makefile.am index e5e008a2..b88c4f26 100644 --- a/Makefile.am +++ b/Makefile.am @@ -111,6 +111,7 @@ vkd3d_shader_tests = \ tests/hlsl/entry-point-semantics.shader_test \ tests/hlsl/exp.shader_test \ tests/hlsl/expr-indexing.shader_test \ + tests/hlsl/f16tof32.shader_test \ tests/hlsl/faceforward.shader_test \ tests/hlsl/float-comparison.shader_test \ tests/hlsl/floor.shader_test \ diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 5c07e574..6cad68ab 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2891,6 +2891,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_DSY_COARSE] = "dsy_coarse", [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", + [HLSL_OP1_F16TOF32] = "f16tof32", [HLSL_OP1_FLOOR] = "floor", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 740e4e78..01fb9665 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -657,6 +657,7 @@ enum hlsl_ir_expr_op HLSL_OP1_DSY_COARSE, HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, + HLSL_OP1_F16TOF32, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, HLSL_OP1_LOG2, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 02975628..aa01e11a 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -3070,6 +3070,19 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, return convert_args(ctx, params, type, loc); } +static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + + return convert_args(ctx, params, type, loc); +} + static bool intrinsic_abs(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3775,6 +3788,21 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, return add_user_call(ctx, func, params, loc); } +static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type; + + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) + return false; + + type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); +} + static bool intrinsic_floor(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4875,6 +4903,7 @@ intrinsic_functions[] = {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, + {"f16tof32", 1, true, intrinsic_f16tof32}, {"faceforward", 3, true, intrinsic_faceforward}, {"floor", 1, true, intrinsic_floor}, {"fmod", 2, true, intrinsic_fmod}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 4d0bd5fe..9a076c4a 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -6249,6 +6249,90 @@ static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *bloc } } +static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_node *call, *rhs, *store; + struct hlsl_ir_function_decl *func; + unsigned int component_count; + struct hlsl_ir_load *load; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + static const char template[] = + "typedef uint%u uintX;\n" + "float%u soft_f16tof32(uintX x)\n" + "{\n" + " uintX mantissa = x & 0x3ff;\n" + " uintX high2 = mantissa >> 8;\n" + " uintX high2_check = high2 ? high2 : mantissa;\n" + " uintX high6 = high2_check >> 4;\n" + " uintX high6_check = high6 ? high6 : high2_check;\n" + "\n" + " uintX high8 = high6_check >> 2;\n" + " uintX high8_check = (high8 ? high8 : high6_check) >> 1;\n" + " uintX shift = high6 ? (high2 ? 12 : 4) : (high2 ? 8 : 0);\n" + " shift = high8 ? shift + 2 : shift;\n" + " shift = high8_check ? shift + 1 : shift;\n" + " shift = -shift + 10;\n" + " shift = mantissa ? shift : 11;\n" + " uintX subnormal_mantissa = ((mantissa << shift) << 23) & 0x7fe000;\n" + " uintX subnormal_exp = -(shift << 23) + 0x38800000;\n" + " uintX subnormal_val = subnormal_exp + subnormal_mantissa;\n" + " uintX subnormal_or_zero = mantissa ? subnormal_val : 0;\n" + "\n" + " uintX exponent = (((x >> 10) << 23) & 0xf800000) + 0x38000000;\n" + "\n" + " uintX low_3 = (x << 13) & 0x7fe000;\n" + " uintX normalized_val = exponent + low_3;\n" + " uintX inf_nan_val = low_3 + 0x7f800000;\n" + "\n" + " uintX exp_mask = 0x7c00;\n" + " uintX is_inf_nan = (x & exp_mask) == exp_mask;\n" + " uintX is_normalized = x & exp_mask;\n" + "\n" + " uintX check = is_inf_nan ? inf_nan_val : normalized_val;\n" + " uintX exp_mantissa = (is_normalized ? check : subnormal_or_zero) & 0x7fffe000;\n" + " uintX sign_bit = (x << 16) & 0x80000000;\n" + "\n" + " return asfloat(exp_mantissa + sign_bit);\n" + "}\n"; + + + if (node->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(node); + + if (expr->op != HLSL_OP1_F16TOF32) + return false; + + rhs = expr->operands[0].node; + component_count = hlsl_type_component_count(rhs->data_type); + + if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) + return false; + + if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body))) + return false; + + lhs = func->parameters.vars[0]; + + if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) + return false; + hlsl_block_add_instr(block, store); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return false; + hlsl_block_add_instr(block, call); + + if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) + return false; + hlsl_block_add_instr(block, &load->node); + + return true; +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -6269,6 +6353,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (ctx->result) return ctx->result; + if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) + lower_ir(ctx, lower_f16tof32, body); + lower_return(ctx, entry_func, body, false); while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index e18bfd2e..27516e4c 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -5155,6 +5155,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); break; + case HLSL_OP1_F16TOF32: + assert(type_is_float(dst_type)); + assert(hlsl_version_ge(tpf->ctx, 5, 0)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); + break; + case HLSL_OP1_FLOOR: assert(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); diff --git a/tests/hlsl/f16tof32.shader_test b/tests/hlsl/f16tof32.shader_test new file mode 100644 index 00000000..22054f2f --- /dev/null +++ b/tests/hlsl/f16tof32.shader_test @@ -0,0 +1,87 @@ +[require] +shader model >= 4.0 + +[pixel shader] +uniform uint a; + +float4 main() : sv_target +{ + return f16tof32(a); +} + +[test] +uniform 0 uint 0x5140 +todo(glsl) draw quad +probe (0, 0) rgba(42.0, 42.0, 42.0, 42.0) + +% zero +uniform 0 uint 0x0 +todo(glsl) draw quad +probe (0, 0) rgba(0.0, 0.0, 0.0, 0.0) + +% negative zero +uniform 0 uint 0x8000 +todo(glsl) draw quad +probe (0, 0) rgba(-0.0, -0.0, -0.0, -0.0) + +% subnormal number +uniform 0 uint 0x1 +todo(glsl) draw quad +probe (0, 0) rgba(5.9604645e-08, 5.9604645e-08, 5.9604645e-08, 5.9604645e-08) + +% subnormal number +uniform 0 uint 0x2 +todo(glsl) draw quad +probe (0, 0) rgba(1.1920929e-07, 1.1920929e-07, 1.1920929e-07, 1.1920929e-07) + +% subnormal number +uniform 0 uint 0x4 +todo(glsl) draw quad +probe (0, 0) rgba(2.3841858e-07, 2.3841858e-07, 2.3841858e-07, 2.3841858e-07) + +% subnormal number +uniform 0 uint 0x8 +todo(glsl) draw quad +probe (0, 0) rgba(4.7683716e-07, 4.7683716e-07, 4.7683716e-07, 4.7683716e-07) + +% subnormal number +uniform 0 uint 0x10 +todo(glsl) draw quad +probe (0, 0) rgba(9.536743e-07, 9.536743e-07, 9.536743e-07, 9.536743e-07) + +% subnormal number +uniform 0 uint 0x20 +todo(glsl) draw quad +probe (0, 0) rgba(1.9073486e-06, 1.9073486e-06, 1.9073486e-06, 1.9073486e-06) + +% subnormal number +uniform 0 uint 0x40 +todo(glsl) draw quad +probe (0, 0) rgba(3.8146973e-06, 3.8146973e-06, 3.8146973e-06, 3.8146973e-06) + +% subnormal number +uniform 0 uint 0x80 +todo(glsl) draw quad +probe (0, 0) rgba(7.6293945e-06, 7.6293945e-06, 7.6293945e-06, 7.6293945e-06) + +% subnormal number +uniform 0 uint 0x100 +todo(glsl) draw quad +probe (0, 0) rgba(1.5258789e-05, 1.5258789e-05, 1.5258789e-05, 1.5258789e-05) + +% subnormal number +uniform 0 uint 0x200 +todo(glsl) draw quad +probe (0, 0) rgba(3.0517578e-05, 3.0517578e-05, 3.0517578e-05, 3.0517578e-05) + +% I'd love to use rgba probes here but msvc doesn't scanf infinity :( + +% positive infinity +uniform 0 uint 0x7c00 +todo(glsl) draw quad +probe (0, 0) rgbaui(0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000) + +% negative infinity +uniform 0 uint 0xfc00 +todo(glsl) draw quad +probe (0, 0) rgbaui(0xff800000, 0xff800000, 0xff800000, 0xff800000)