From 63fbe161f26a00e5a94cd0f52c3bd59cf2467a74 Mon Sep 17 00:00:00 2001 From: Shaun Ren Date: Fri, 20 Dec 2024 15:13:27 -0500 Subject: [PATCH] vkd3d-shader/hlsl: Implement the InterlockedMax() intrinsic. --- libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl.y | 23 ++++++++++++++++++++++- libs/vkd3d-shader/hlsl_codegen.c | 11 +++++++++++ libs/vkd3d-shader/tpf.c | 4 ++++ 5 files changed, 39 insertions(+), 1 deletion(-) diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 6d1626cb..d0405975 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -3510,6 +3510,7 @@ static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct [HLSL_INTERLOCKED_AND] = "and", [HLSL_INTERLOCKED_CMP_EXCH] = "cmp_exch", [HLSL_INTERLOCKED_EXCH] = "exch", + [HLSL_INTERLOCKED_MAX] = "max", }; VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names)); diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 0c02a2e2..c5703417 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -957,6 +957,7 @@ enum hlsl_interlocked_op HLSL_INTERLOCKED_AND, HLSL_INTERLOCKED_CMP_EXCH, HLSL_INTERLOCKED_EXCH, + HLSL_INTERLOCKED_MAX, }; /* Represents an interlocked operation. diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 5486e785..5f9d47f2 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -5429,7 +5429,21 @@ static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op /* Interlocked*() functions always take uint for the value parameters, * except for InterlockedMax()/InterlockedMin(). */ - val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); + if (op == HLSL_INTERLOCKED_MAX) + { + enum hlsl_base_type val_base_type = val->data_type->e.numeric.type; + + /* Floating values are always cast to signed integers. */ + if (val_base_type == HLSL_TYPE_FLOAT || val_base_type == HLSL_TYPE_HALF || val_base_type == HLSL_TYPE_DOUBLE) + val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_INT); + else + val_type = hlsl_get_scalar_type(ctx, lhs_type->e.numeric.type); + } + else + { + val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); + } + if (cmp_val && !(cmp_val = add_implicit_conversion(ctx, params->instrs, cmp_val, val_type, loc))) return false; if (!(val = add_implicit_conversion(ctx, params->instrs, val, val_type, loc))) @@ -5516,6 +5530,12 @@ static bool intrinsic_InterlockedExchange(struct hlsl_ctx *ctx, return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_EXCH, params, loc, "InterlockedExchange"); } +static bool intrinsic_InterlockedMax(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_MAX, params, loc, "InterlockedMax"); +} + static const struct intrinsic_function { const char *name; @@ -5534,6 +5554,7 @@ intrinsic_functions[] = {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange}, {"InterlockedCompareStore", 3, true, intrinsic_InterlockedCompareStore}, {"InterlockedExchange", 3, true, intrinsic_InterlockedExchange}, + {"InterlockedMax", -1, true, intrinsic_InterlockedMax}, {"abs", 1, true, intrinsic_abs}, {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index ff563e0e..c67ce835 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -10003,6 +10003,7 @@ static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD, [HLSL_INTERLOCKED_AND] = VKD3DSIH_ATOMIC_AND, [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_ATOMIC_CMP_STORE, + [HLSL_INTERLOCKED_MAX] = VKD3DSIH_ATOMIC_UMAX, }; static const enum vkd3d_shader_opcode imm_opcodes[] = @@ -10011,6 +10012,7 @@ static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, [HLSL_INTERLOCKED_AND] = VKD3DSIH_IMM_ATOMIC_AND, [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_IMM_ATOMIC_CMP_EXCH, [HLSL_INTERLOCKED_EXCH] = VKD3DSIH_IMM_ATOMIC_EXCH, + [HLSL_INTERLOCKED_MAX] = VKD3DSIH_IMM_ATOMIC_UMAX, }; struct hlsl_ir_node *cmp_value = interlocked->cmp_value.node, *value = interlocked->value.node; @@ -10022,6 +10024,15 @@ static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, enum vkd3d_shader_opcode opcode; opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op]; + + if (value->data_type->e.numeric.type == HLSL_TYPE_INT) + { + if (opcode == VKD3DSIH_ATOMIC_UMAX) + opcode = VKD3DSIH_ATOMIC_IMAX; + else if (opcode == VKD3DSIH_IMM_ATOMIC_UMAX) + opcode = VKD3DSIH_IMM_ATOMIC_IMAX; + } + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, is_imm ? 2 : 1, cmp_value ? 3 : 2))) return false; diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index 9b66770e..b5dbd566 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4029,6 +4029,8 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_ATOMIC_AND: case VKD3DSIH_ATOMIC_CMP_STORE: case VKD3DSIH_ATOMIC_IADD: + case VKD3DSIH_ATOMIC_IMAX: + case VKD3DSIH_ATOMIC_UMAX: case VKD3DSIH_AND: case VKD3DSIH_BREAK: case VKD3DSIH_CASE: @@ -4075,6 +4077,8 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_IMM_ATOMIC_CMP_EXCH: case VKD3DSIH_IMM_ATOMIC_EXCH: case VKD3DSIH_IMM_ATOMIC_IADD: + case VKD3DSIH_IMM_ATOMIC_IMAX: + case VKD3DSIH_IMM_ATOMIC_UMAX: case VKD3DSIH_IMUL: case VKD3DSIH_INE: case VKD3DSIH_INEG: