From 758a4bef090bbbda185d6a00e48d2b4005be31e6 Mon Sep 17 00:00:00 2001 From: Francisco Casas Date: Wed, 16 Apr 2025 00:30:01 +0000 Subject: [PATCH] vkd3d-shader/hlsl: Parse barriers. And introduce hlsl_ir_sync to represent them. --- Makefile.am | 1 + libs/vkd3d-shader/hlsl.c | 63 ++++++++++++++++++++++++++++ libs/vkd3d-shader/hlsl.h | 19 +++++++++ libs/vkd3d-shader/hlsl.y | 70 +++++++++++++++++++++++++++++++- libs/vkd3d-shader/hlsl_codegen.c | 9 +++- tests/hlsl/barriers.shader_test | 15 +++++++ 6 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 tests/hlsl/barriers.shader_test diff --git a/Makefile.am b/Makefile.am index f93ebc447..115c5a3e7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -64,6 +64,7 @@ vkd3d_shader_tests = \ tests/hlsl/asint.shader_test \ tests/hlsl/asuint.shader_test \ tests/hlsl/attributes.shader_test \ + tests/hlsl/barriers.shader_test \ tests/hlsl/bitwise-assignment.shader_test \ tests/hlsl/bitwise.shader_test \ tests/hlsl/bool-cast.shader_test \ diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index a6b464748..0f9aafbe1 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2291,6 +2291,26 @@ struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interl return &interlocked->node; } +static struct hlsl_ir_node *hlsl_new_sync(struct hlsl_ctx *ctx, + uint32_t sync_flags, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_sync *sync; + + if (!(sync = hlsl_alloc(ctx, sizeof(*sync)))) + return NULL; + + init_node(&sync->node, HLSL_IR_SYNC, NULL, loc); + sync->sync_flags = sync_flags; + + return &sync->node; +} + +struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block, + uint32_t sync_flags, const struct vkd3d_shader_location *loc) +{ + return append_new_instr(ctx, block, hlsl_new_sync(ctx, sync_flags, loc)); +} + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) { struct hlsl_type *type = index->val.node->data_type; @@ -2681,6 +2701,18 @@ static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx, return &dst->node; } +static struct hlsl_ir_node *clone_sync(struct hlsl_ctx *ctx, struct hlsl_ir_sync *src) +{ + struct hlsl_ir_sync *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_SYNC, NULL, &src->node.loc); + dst->sync_flags = src->sync_flags; + + return &dst->node; +} + static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_compile *compile) { @@ -2884,6 +2916,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_INTERLOCKED: return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr)); + case HLSL_IR_SYNC: + return clone_sync(ctx, hlsl_ir_sync(instr)); + case HLSL_IR_COMPILE: return clone_compile(ctx, map, hlsl_ir_compile(instr)); @@ -3341,7 +3376,9 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_STORE ] = "HLSL_IR_STORE", [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED", + [HLSL_IR_SYNC ] = "HLSL_IR_SYNC", [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", @@ -3831,6 +3868,19 @@ static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct vkd3d_string_buffer_printf(buffer, ")"); } +static void dump_ir_sync(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_sync *sync) +{ + vkd3d_string_buffer_printf(buffer, "sync"); + if (sync->sync_flags & VKD3DSSF_GLOBAL_UAV) + vkd3d_string_buffer_printf(buffer, "_uglobal"); + if (sync->sync_flags & VKD3DSSF_THREAD_GROUP_UAV) + vkd3d_string_buffer_printf(buffer, "_ugroup"); + if (sync->sync_flags & VKD3DSSF_GROUP_SHARED_MEMORY) + vkd3d_string_buffer_printf(buffer, "_g"); + if (sync->sync_flags & VKD3DSSF_THREAD_GROUP) + vkd3d_string_buffer_printf(buffer, "_t"); +} + static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_compile *compile) { @@ -3968,6 +4018,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr)); break; + case HLSL_IR_SYNC: + dump_ir_sync(buffer, hlsl_ir_sync(instr)); + break; + case HLSL_IR_COMPILE: dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); break; @@ -4205,6 +4259,11 @@ static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked) vkd3d_free(interlocked); } +static void free_ir_sync(struct hlsl_ir_sync *sync) +{ + vkd3d_free(sync); +} + static void free_ir_compile(struct hlsl_ir_compile *compile) { unsigned int i; @@ -4295,6 +4354,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_interlocked(hlsl_ir_interlocked(node)); break; + case HLSL_IR_SYNC: + free_ir_sync(hlsl_ir_sync(node)); + break; + case HLSL_IR_COMPILE: free_ir_compile(hlsl_ir_compile(node)); break; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index e918c9034..c9ee90d4e 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -329,7 +329,9 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + HLSL_IR_INTERLOCKED, + HLSL_IR_SYNC, HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, @@ -1006,6 +1008,15 @@ struct hlsl_ir_interlocked struct hlsl_src coords, cmp_value, value; }; +/* Represents a thread synchronization instruction such as GroupMemoryBarrier().*/ +struct hlsl_ir_sync +{ + struct hlsl_ir_node node; + + /* Flags from enum vkd3d_shader_sync_flags. */ + uint32_t sync_flags; +}; + struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -1343,6 +1354,12 @@ static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node); } +static inline struct hlsl_ir_sync *hlsl_ir_sync(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_SYNC); + return CONTAINING_RECORD(node, struct hlsl_ir_sync, node); +} + static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); @@ -1582,6 +1599,8 @@ void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, unsigned int writemask, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block, + uint32_t sync_flags, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, unsigned int n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_unary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index 702fd30bd..05657d27b 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -574,13 +574,14 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx /* fall-through */ case HLSL_IR_CALL: case HLSL_IR_IF: + case HLSL_IR_INTERLOCKED: case HLSL_IR_LOOP: case HLSL_IR_JUMP: case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: - case HLSL_IR_INTERLOCKED: case HLSL_IR_STATEBLOCK_CONSTANT: + case HLSL_IR_SYNC: hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; @@ -5110,6 +5111,67 @@ static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor"); } +static void validate_group_barrier_profile(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) +{ + if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE || hlsl_version_lt(ctx, 5, 0)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Group barriers can only be used in compute shaders 5.0 or higher."); + } +} + +static bool intrinsic_AllMemoryBarrier(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV + | VKD3DSSF_GROUP_SHARED_MEMORY, loc); +} + +static bool intrinsic_AllMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV + | VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc); +} + +static bool intrinsic_DeviceMemoryBarrier(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + if ((ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE && ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) + || hlsl_version_lt(ctx, 5, 0)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "DeviceMemoryBarrier() can only be used in pixel and compute shaders 5.0 or higher."); + } + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV, loc); +} + +static bool intrinsic_DeviceMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV + | VKD3DSSF_THREAD_GROUP, loc); +} + +static bool intrinsic_GroupMemoryBarrier(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, + VKD3DSSF_GROUP_SHARED_MEMORY, loc); +} + +static bool intrinsic_GroupMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, + VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc); +} + static const struct intrinsic_function { const char *name; @@ -5121,8 +5183,14 @@ static const struct intrinsic_function intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ + {"AllMemoryBarrier", 0, true, intrinsic_AllMemoryBarrier}, + {"AllMemoryBarrierWithGroupSync", 0, true, intrinsic_AllMemoryBarrierWithGroupSync}, {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, + {"DeviceMemoryBarrier", 0, true, intrinsic_DeviceMemoryBarrier}, + {"DeviceMemoryBarrierWithGroupSync", 0, true, intrinsic_DeviceMemoryBarrierWithGroupSync}, {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, + {"GroupMemoryBarrier", 0, true, intrinsic_GroupMemoryBarrier}, + {"GroupMemoryBarrierWithGroupSync", 0, true, intrinsic_GroupMemoryBarrierWithGroupSync}, {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd}, {"InterlockedAnd", -1, true, intrinsic_InterlockedAnd}, {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 8fb23a4b2..e7992844d 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -2453,6 +2453,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_INTERLOCKED: progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); + break; default: break; @@ -5127,11 +5128,12 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_CALL: case HLSL_IR_IF: + case HLSL_IR_INTERLOCKED: case HLSL_IR_JUMP: case HLSL_IR_LOOP: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: - case HLSL_IR_INTERLOCKED: + case HLSL_IR_SYNC: break; case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ @@ -5415,6 +5417,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop } case HLSL_IR_CONSTANT: case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SYNC: break; case HLSL_IR_COMPILE: case HLSL_IR_SAMPLER_STATE: @@ -11414,6 +11417,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr)); break; + case HLSL_IR_SYNC: + hlsl_fixme(ctx, &instr->loc, "Sync instructions."); + break; + default: break; } diff --git a/tests/hlsl/barriers.shader_test b/tests/hlsl/barriers.shader_test new file mode 100644 index 000000000..98f0fea5b --- /dev/null +++ b/tests/hlsl/barriers.shader_test @@ -0,0 +1,15 @@ +[require] +shader model >= 5.0 + +% This is just a parsing test. +[compute shader todo] + [numthreads(1024, 1, 1)] +void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID) +{ + GroupMemoryBarrier(); + GroupMemoryBarrierWithGroupSync(); + DeviceMemoryBarrier(); + DeviceMemoryBarrierWithGroupSync(); + AllMemoryBarrier(); + AllMemoryBarrierWithGroupSync(); +}