vkd3d-shader/hlsl: Parse barriers.

And introduce hlsl_ir_sync to represent them.
This commit is contained in:
Francisco Casas
2025-04-16 00:30:01 +00:00
committed by Henri Verbeet
parent 541060215e
commit 758a4bef09
Notes: Henri Verbeet 2025-05-05 15:28:08 +02:00
Approved-by: Conor McCarthy (@cmccarthy)
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1470
6 changed files with 175 additions and 2 deletions

View File

@@ -64,6 +64,7 @@ vkd3d_shader_tests = \
tests/hlsl/asint.shader_test \
tests/hlsl/asuint.shader_test \
tests/hlsl/attributes.shader_test \
tests/hlsl/barriers.shader_test \
tests/hlsl/bitwise-assignment.shader_test \
tests/hlsl/bitwise.shader_test \
tests/hlsl/bool-cast.shader_test \

View File

@@ -2291,6 +2291,26 @@ struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interl
return &interlocked->node;
}
static struct hlsl_ir_node *hlsl_new_sync(struct hlsl_ctx *ctx,
uint32_t sync_flags, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_sync *sync;
if (!(sync = hlsl_alloc(ctx, sizeof(*sync))))
return NULL;
init_node(&sync->node, HLSL_IR_SYNC, NULL, loc);
sync->sync_flags = sync_flags;
return &sync->node;
}
struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block,
uint32_t sync_flags, const struct vkd3d_shader_location *loc)
{
return append_new_instr(ctx, block, hlsl_new_sync(ctx, sync_flags, loc));
}
bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index)
{
struct hlsl_type *type = index->val.node->data_type;
@@ -2681,6 +2701,18 @@ static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx,
return &dst->node;
}
static struct hlsl_ir_node *clone_sync(struct hlsl_ctx *ctx, struct hlsl_ir_sync *src)
{
struct hlsl_ir_sync *dst;
if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
return NULL;
init_node(&dst->node, HLSL_IR_SYNC, NULL, &src->node.loc);
dst->sync_flags = src->sync_flags;
return &dst->node;
}
static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_compile *compile)
{
@@ -2884,6 +2916,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_INTERLOCKED:
return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr));
case HLSL_IR_SYNC:
return clone_sync(ctx, hlsl_ir_sync(instr));
case HLSL_IR_COMPILE:
return clone_compile(ctx, map, hlsl_ir_compile(instr));
@@ -3341,7 +3376,9 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type)
[HLSL_IR_STORE ] = "HLSL_IR_STORE",
[HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH",
[HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE",
[HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED",
[HLSL_IR_SYNC ] = "HLSL_IR_SYNC",
[HLSL_IR_COMPILE] = "HLSL_IR_COMPILE",
[HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE",
@@ -3831,6 +3868,19 @@ static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct
vkd3d_string_buffer_printf(buffer, ")");
}
static void dump_ir_sync(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_sync *sync)
{
vkd3d_string_buffer_printf(buffer, "sync");
if (sync->sync_flags & VKD3DSSF_GLOBAL_UAV)
vkd3d_string_buffer_printf(buffer, "_uglobal");
if (sync->sync_flags & VKD3DSSF_THREAD_GROUP_UAV)
vkd3d_string_buffer_printf(buffer, "_ugroup");
if (sync->sync_flags & VKD3DSSF_GROUP_SHARED_MEMORY)
vkd3d_string_buffer_printf(buffer, "_g");
if (sync->sync_flags & VKD3DSSF_THREAD_GROUP)
vkd3d_string_buffer_printf(buffer, "_t");
}
static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
const struct hlsl_ir_compile *compile)
{
@@ -3968,6 +4018,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr));
break;
case HLSL_IR_SYNC:
dump_ir_sync(buffer, hlsl_ir_sync(instr));
break;
case HLSL_IR_COMPILE:
dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr));
break;
@@ -4205,6 +4259,11 @@ static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked)
vkd3d_free(interlocked);
}
static void free_ir_sync(struct hlsl_ir_sync *sync)
{
vkd3d_free(sync);
}
static void free_ir_compile(struct hlsl_ir_compile *compile)
{
unsigned int i;
@@ -4295,6 +4354,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
free_ir_interlocked(hlsl_ir_interlocked(node));
break;
case HLSL_IR_SYNC:
free_ir_sync(hlsl_ir_sync(node));
break;
case HLSL_IR_COMPILE:
free_ir_compile(hlsl_ir_compile(node));
break;

View File

@@ -329,7 +329,9 @@ enum hlsl_ir_node_type
HLSL_IR_STORE,
HLSL_IR_SWIZZLE,
HLSL_IR_SWITCH,
HLSL_IR_INTERLOCKED,
HLSL_IR_SYNC,
HLSL_IR_COMPILE,
HLSL_IR_SAMPLER_STATE,
@@ -1006,6 +1008,15 @@ struct hlsl_ir_interlocked
struct hlsl_src coords, cmp_value, value;
};
/* Represents a thread synchronization instruction such as GroupMemoryBarrier().*/
struct hlsl_ir_sync
{
struct hlsl_ir_node node;
/* Flags from enum vkd3d_shader_sync_flags. */
uint32_t sync_flags;
};
struct hlsl_scope
{
/* Item entry for hlsl_ctx.scopes. */
@@ -1343,6 +1354,12 @@ static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_
return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node);
}
static inline struct hlsl_ir_sync *hlsl_ir_sync(const struct hlsl_ir_node *node)
{
VKD3D_ASSERT(node->type == HLSL_IR_SYNC);
return CONTAINING_RECORD(node, struct hlsl_ir_sync, node);
}
static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node)
{
VKD3D_ASSERT(node->type == HLSL_IR_COMPILE);
@@ -1582,6 +1599,8 @@ void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block,
unsigned int writemask, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s,
unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block,
uint32_t sync_flags, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block,
unsigned int n, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_unary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,

View File

@@ -574,13 +574,14 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
/* fall-through */
case HLSL_IR_CALL:
case HLSL_IR_IF:
case HLSL_IR_INTERLOCKED:
case HLSL_IR_LOOP:
case HLSL_IR_JUMP:
case HLSL_IR_RESOURCE_LOAD:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
case HLSL_IR_INTERLOCKED:
case HLSL_IR_STATEBLOCK_CONSTANT:
case HLSL_IR_SYNC:
hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
"Expected literal expression.");
break;
@@ -5110,6 +5111,67 @@ static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx,
return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor");
}
static void validate_group_barrier_profile(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc)
{
if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE || hlsl_version_lt(ctx, 5, 0))
{
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
"Group barriers can only be used in compute shaders 5.0 or higher.");
}
}
static bool intrinsic_AllMemoryBarrier(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
validate_group_barrier_profile(ctx, loc);
return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV
| VKD3DSSF_GROUP_SHARED_MEMORY, loc);
}
static bool intrinsic_AllMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
validate_group_barrier_profile(ctx, loc);
return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV
| VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc);
}
static bool intrinsic_DeviceMemoryBarrier(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
if ((ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE && ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL)
|| hlsl_version_lt(ctx, 5, 0))
{
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
"DeviceMemoryBarrier() can only be used in pixel and compute shaders 5.0 or higher.");
}
return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV, loc);
}
static bool intrinsic_DeviceMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
validate_group_barrier_profile(ctx, loc);
return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV
| VKD3DSSF_THREAD_GROUP, loc);
}
static bool intrinsic_GroupMemoryBarrier(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
validate_group_barrier_profile(ctx, loc);
return !!hlsl_block_add_sync(ctx, params->instrs,
VKD3DSSF_GROUP_SHARED_MEMORY, loc);
}
static bool intrinsic_GroupMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
validate_group_barrier_profile(ctx, loc);
return !!hlsl_block_add_sync(ctx, params->instrs,
VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc);
}
static const struct intrinsic_function
{
const char *name;
@@ -5121,8 +5183,14 @@ static const struct intrinsic_function
intrinsic_functions[] =
{
/* Note: these entries should be kept in alphabetical order. */
{"AllMemoryBarrier", 0, true, intrinsic_AllMemoryBarrier},
{"AllMemoryBarrierWithGroupSync", 0, true, intrinsic_AllMemoryBarrierWithGroupSync},
{"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4},
{"DeviceMemoryBarrier", 0, true, intrinsic_DeviceMemoryBarrier},
{"DeviceMemoryBarrierWithGroupSync", 0, true, intrinsic_DeviceMemoryBarrierWithGroupSync},
{"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount},
{"GroupMemoryBarrier", 0, true, intrinsic_GroupMemoryBarrier},
{"GroupMemoryBarrierWithGroupSync", 0, true, intrinsic_GroupMemoryBarrierWithGroupSync},
{"InterlockedAdd", -1, true, intrinsic_InterlockedAdd},
{"InterlockedAnd", -1, true, intrinsic_InterlockedAnd},
{"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange},

View File

@@ -2453,6 +2453,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
case HLSL_IR_INTERLOCKED:
progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state);
break;
default:
break;
@@ -5127,11 +5128,12 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
case HLSL_IR_CALL:
case HLSL_IR_IF:
case HLSL_IR_INTERLOCKED:
case HLSL_IR_JUMP:
case HLSL_IR_LOOP:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
case HLSL_IR_INTERLOCKED:
case HLSL_IR_SYNC:
break;
case HLSL_IR_STATEBLOCK_CONSTANT:
/* Stateblock constants should not appear in the shader program. */
@@ -5415,6 +5417,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
}
case HLSL_IR_CONSTANT:
case HLSL_IR_STRING_CONSTANT:
case HLSL_IR_SYNC:
break;
case HLSL_IR_COMPILE:
case HLSL_IR_SAMPLER_STATE:
@@ -11414,6 +11417,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo
sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr));
break;
case HLSL_IR_SYNC:
hlsl_fixme(ctx, &instr->loc, "Sync instructions.");
break;
default:
break;
}

View File

@@ -0,0 +1,15 @@
[require]
shader model >= 5.0
% This is just a parsing test.
[compute shader todo]
[numthreads(1024, 1, 1)]
void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
{
GroupMemoryBarrier();
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
AllMemoryBarrier();
AllMemoryBarrierWithGroupSync();
}