vkd3d-shader/hlsl: Implement f16tof32 intrinsic.

Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/954
2025-12-15 08:03:30 -08:00 · 2024-07-06 12:24:22 -03:00 · 2024-08-08 23:47:42 +02:00
parent 693e89c74e
commit 8c3a5e5458
7 changed files with 212 additions and 0 deletions
--- a/Makefile.am
+++ b/Makefile.am
@@ -111,6 +111,7 @@ vkd3d_shader_tests = \
 	tests/hlsl/entry-point-semantics.shader_test \
 	tests/hlsl/exp.shader_test \
 	tests/hlsl/expr-indexing.shader_test \
+	tests/hlsl/f16tof32.shader_test \
 	tests/hlsl/faceforward.shader_test \
 	tests/hlsl/float-comparison.shader_test \
 	tests/hlsl/floor.shader_test \
--- a/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d-shader/hlsl.c
@@ -2891,6 +2891,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
        [HLSL_OP1_DSY_COARSE]   = "dsy_coarse",
        [HLSL_OP1_DSY_FINE]     = "dsy_fine",
        [HLSL_OP1_EXP2]         = "exp2",
+        [HLSL_OP1_F16TOF32]     = "f16tof32",
        [HLSL_OP1_FLOOR]        = "floor",
        [HLSL_OP1_FRACT]        = "fract",
        [HLSL_OP1_LOG2]         = "log2",
--- a/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d-shader/hlsl.h
@@ -657,6 +657,7 @@ enum hlsl_ir_expr_op
    HLSL_OP1_DSY_COARSE,
    HLSL_OP1_DSY_FINE,
    HLSL_OP1_EXP2,
+    HLSL_OP1_F16TOF32,
    HLSL_OP1_FLOOR,
    HLSL_OP1_FRACT,
    HLSL_OP1_LOG2,
--- a/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d-shader/hlsl.y
@@ -3070,6 +3070,19 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx,
    return convert_args(ctx, params, type, loc);
 }

+static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+    struct hlsl_type *type;
+
+    if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
+        return false;
+
+    type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy);
+
+    return convert_args(ctx, params, type, loc);
+}
+
 static bool intrinsic_abs(struct hlsl_ctx *ctx,
        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
 {
@@ -3775,6 +3788,21 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx,
    return add_user_call(ctx, func, params, loc);
 }

+static bool intrinsic_f16tof32(struct hlsl_ctx *ctx,
+        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+    struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
+    struct hlsl_type *type;
+
+    if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc))
+        return false;
+
+    type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT);
+
+    operands[0] = params->args[0];
+    return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc);
+}
+
 static bool intrinsic_floor(struct hlsl_ctx *ctx,
        const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
 {
@@ -4875,6 +4903,7 @@ intrinsic_functions[] =
    {"dot",                                 2, true,  intrinsic_dot},
    {"exp",                                 1, true,  intrinsic_exp},
    {"exp2",                                1, true,  intrinsic_exp2},
+    {"f16tof32",                            1, true,  intrinsic_f16tof32},
    {"faceforward",                         3, true,  intrinsic_faceforward},
    {"floor",                               1, true,  intrinsic_floor},
    {"fmod",                                2, true,  intrinsic_fmod},
--- a/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d-shader/hlsl_codegen.c
@@ -6249,6 +6249,90 @@ static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *bloc
    }
 }

+static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
+{
+    struct hlsl_ir_node *call, *rhs, *store;
+    struct hlsl_ir_function_decl *func;
+    unsigned int component_count;
+    struct hlsl_ir_load *load;
+    struct hlsl_ir_expr *expr;
+    struct hlsl_ir_var *lhs;
+    char *body;
+
+    static const char template[] =
+    "typedef uint%u uintX;\n"
+    "float%u soft_f16tof32(uintX x)\n"
+    "{\n"
+    "    uintX mantissa = x & 0x3ff;\n"
+    "    uintX high2 = mantissa >> 8;\n"
+    "    uintX high2_check = high2 ? high2 : mantissa;\n"
+    "    uintX high6 = high2_check >> 4;\n"
+    "    uintX high6_check = high6 ? high6 : high2_check;\n"
+    "\n"
+    "    uintX high8 = high6_check >> 2;\n"
+    "    uintX high8_check = (high8 ? high8 : high6_check) >> 1;\n"
+    "    uintX shift = high6 ? (high2 ? 12 : 4) : (high2 ? 8 : 0);\n"
+    "    shift = high8 ? shift + 2 : shift;\n"
+    "    shift = high8_check ? shift + 1 : shift;\n"
+    "    shift = -shift + 10;\n"
+    "    shift = mantissa ? shift : 11;\n"
+    "    uintX subnormal_mantissa = ((mantissa << shift) << 23) & 0x7fe000;\n"
+    "    uintX subnormal_exp = -(shift << 23) + 0x38800000;\n"
+    "    uintX subnormal_val = subnormal_exp + subnormal_mantissa;\n"
+    "    uintX subnormal_or_zero = mantissa ? subnormal_val : 0;\n"
+    "\n"
+    "    uintX exponent = (((x >> 10) << 23) & 0xf800000) + 0x38000000;\n"
+    "\n"
+    "    uintX low_3 = (x << 13) & 0x7fe000;\n"
+    "    uintX normalized_val = exponent + low_3;\n"
+    "    uintX inf_nan_val = low_3 + 0x7f800000;\n"
+    "\n"
+    "    uintX exp_mask = 0x7c00;\n"
+    "    uintX is_inf_nan = (x & exp_mask) == exp_mask;\n"
+    "    uintX is_normalized = x & exp_mask;\n"
+    "\n"
+    "    uintX check = is_inf_nan ? inf_nan_val : normalized_val;\n"
+    "    uintX exp_mantissa = (is_normalized ? check : subnormal_or_zero) & 0x7fffe000;\n"
+    "    uintX sign_bit = (x << 16) & 0x80000000;\n"
+    "\n"
+    "    return asfloat(exp_mantissa + sign_bit);\n"
+    "}\n";
+
+
+    if (node->type != HLSL_IR_EXPR)
+        return false;
+
+    expr = hlsl_ir_expr(node);
+
+    if (expr->op != HLSL_OP1_F16TOF32)
+        return false;
+
+    rhs = expr->operands[0].node;
+    component_count = hlsl_type_component_count(rhs->data_type);
+
+    if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count)))
+        return false;
+
+    if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body)))
+        return false;
+
+    lhs = func->parameters.vars[0];
+
+    if (!(store = hlsl_new_simple_store(ctx, lhs, rhs)))
+        return false;
+    hlsl_block_add_instr(block, store);
+
+    if (!(call = hlsl_new_call(ctx, func, &node->loc)))
+        return false;
+    hlsl_block_add_instr(block, call);
+
+    if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc)))
+        return false;
+    hlsl_block_add_instr(block, &load->node);
+
+    return true;
+}
+
 int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
        enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out)
 {
@@ -6269,6 +6353,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
    if (ctx->result)
        return ctx->result;

+    if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0))
+        lower_ir(ctx, lower_f16tof32, body);
+
    lower_return(ctx, entry_func, body, false);

    while (hlsl_transform_ir(ctx, lower_calls, body, NULL));
--- a/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d-shader/tpf.c
@@ -5155,6 +5155,12 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex
            write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
            break;

+        case HLSL_OP1_F16TOF32:
+            assert(type_is_float(dst_type));
+            assert(hlsl_version_ge(tpf->ctx, 5, 0));
+            write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0);
+            break;
+
        case HLSL_OP1_FLOOR:
            assert(type_is_float(dst_type));
            write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0);
--- a/tests/hlsl/f16tof32.shader_test
+++ b/tests/hlsl/f16tof32.shader_test
@@ -0,0 +1,87 @@
+[require]
+shader model >= 4.0
+
+[pixel shader]
+uniform uint a;
+
+float4 main() : sv_target
+{
+    return f16tof32(a);
+}
+
+[test]
+uniform 0 uint 0x5140
+todo(glsl) draw quad
+probe (0, 0) rgba(42.0, 42.0, 42.0, 42.0)
+
+% zero
+uniform 0 uint 0x0
+todo(glsl) draw quad
+probe (0, 0) rgba(0.0, 0.0, 0.0, 0.0)
+
+% negative zero
+uniform 0 uint 0x8000
+todo(glsl) draw quad
+probe (0, 0) rgba(-0.0, -0.0, -0.0, -0.0)
+
+% subnormal number
+uniform 0 uint 0x1
+todo(glsl) draw quad
+probe (0, 0) rgba(5.9604645e-08, 5.9604645e-08, 5.9604645e-08, 5.9604645e-08)
+
+% subnormal number
+uniform 0 uint 0x2
+todo(glsl) draw quad
+probe (0, 0) rgba(1.1920929e-07, 1.1920929e-07, 1.1920929e-07, 1.1920929e-07)
+
+% subnormal number
+uniform 0 uint 0x4
+todo(glsl) draw quad
+probe (0, 0) rgba(2.3841858e-07, 2.3841858e-07, 2.3841858e-07, 2.3841858e-07)
+
+% subnormal number
+uniform 0 uint 0x8
+todo(glsl) draw quad
+probe (0, 0) rgba(4.7683716e-07, 4.7683716e-07, 4.7683716e-07, 4.7683716e-07)
+
+% subnormal number
+uniform 0 uint 0x10
+todo(glsl) draw quad
+probe (0, 0) rgba(9.536743e-07, 9.536743e-07, 9.536743e-07, 9.536743e-07)
+
+% subnormal number
+uniform 0 uint 0x20
+todo(glsl) draw quad
+probe (0, 0) rgba(1.9073486e-06, 1.9073486e-06, 1.9073486e-06, 1.9073486e-06)
+
+% subnormal number
+uniform 0 uint 0x40
+todo(glsl) draw quad
+probe (0, 0) rgba(3.8146973e-06, 3.8146973e-06, 3.8146973e-06, 3.8146973e-06)
+
+% subnormal number
+uniform 0 uint 0x80
+todo(glsl) draw quad
+probe (0, 0) rgba(7.6293945e-06, 7.6293945e-06, 7.6293945e-06, 7.6293945e-06)
+
+% subnormal number
+uniform 0 uint 0x100
+todo(glsl) draw quad
+probe (0, 0) rgba(1.5258789e-05, 1.5258789e-05, 1.5258789e-05, 1.5258789e-05)
+
+% subnormal number
+uniform 0 uint 0x200
+todo(glsl) draw quad
+probe (0, 0) rgba(3.0517578e-05, 3.0517578e-05, 3.0517578e-05, 3.0517578e-05)
+
+% I'd love to use rgba probes here but msvc doesn't scanf infinity :(
+
+% positive infinity
+uniform 0 uint 0x7c00
+todo(glsl) draw quad
+probe (0, 0) rgbaui(0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000)
+
+% negative infinity
+uniform 0 uint 0xfc00
+todo(glsl) draw quad
+probe (0, 0) rgbaui(0xff800000, 0xff800000, 0xff800000, 0xff800000)