Files
wine-staging/patches/vkd3d-latest/0002-Updated-vkd3d-to-4289ec60a1f79f68ea9bd3624141b5657b8.patch

1719 lines
71 KiB
Diff
Raw Permalink Normal View History

2025-05-15 08:02:49 +10:00
From df5bd15017140c87a2b2a3f5749cc5151afcd546 Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Tue, 6 May 2025 06:42:46 +1000
2025-05-13 09:05:49 +10:00
Subject: [PATCH] Updated vkd3d to 4289ec60a1f79f68ea9bd3624141b5657b82d6c8.
---
libs/vkd3d/include/private/vkd3d_version.h | 2 +-
libs/vkd3d/libs/vkd3d-shader/dxil.c | 330 ++++++++++++--------
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 63 ++++
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 19 ++
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 70 ++++-
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 231 +++++++++++---
libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 +
7 files changed, 544 insertions(+), 174 deletions(-)
diff --git a/libs/vkd3d/include/private/vkd3d_version.h b/libs/vkd3d/include/private/vkd3d_version.h
index 795bc2dc490..fb2e2f11f8b 100644
--- a/libs/vkd3d/include/private/vkd3d_version.h
+++ b/libs/vkd3d/include/private/vkd3d_version.h
@@ -1 +1 @@
-#define VKD3D_VCS_ID " (git a4f58be0)"
+#define VKD3D_VCS_ID " (git 4289ec60)"
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
index 775be85334e..52bab40b553 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
@@ -647,6 +647,7 @@ enum sm6_value_type
VALUE_TYPE_REG,
VALUE_TYPE_ICB,
VALUE_TYPE_HANDLE,
+ VALUE_TYPE_SSA,
};
struct sm6_function_data
@@ -663,6 +664,11 @@ struct sm6_handle_data
bool non_uniform;
};
+struct sm6_ssa_data
+{
+ unsigned int id;
+};
+
struct sm6_value
{
const struct sm6_type *type;
@@ -673,10 +679,11 @@ struct sm6_value
union
{
struct sm6_function_data function;
- struct vkd3d_shader_register reg;
const struct vkd3d_shader_immediate_constant_buffer *icb;
struct sm6_handle_data handle;
+ struct sm6_ssa_data ssa;
} u;
+ struct vkd3d_shader_register reg;
};
struct dxil_record
@@ -2232,7 +2239,15 @@ static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_pa
static inline bool sm6_value_is_register(const struct sm6_value *value)
{
- return value->value_type == VALUE_TYPE_REG;
+ switch (value->value_type)
+ {
+ case VALUE_TYPE_REG:
+ case VALUE_TYPE_SSA:
+ return true;
+
+ default:
+ return false;
+ }
}
static bool sm6_value_is_handle(const struct sm6_value *value)
@@ -2242,18 +2257,18 @@ static bool sm6_value_is_handle(const struct sm6_value *value)
static inline bool sm6_value_is_constant(const struct sm6_value *value)
{
- return sm6_value_is_register(value) && register_is_constant(&value->u.reg);
+ return sm6_value_is_register(value) && register_is_constant(&value->reg);
}
static bool sm6_value_is_constant_zero(const struct sm6_value *value)
{
/* Constant vectors do not occur. */
- return sm6_value_is_register(value) && register_is_scalar_constant_zero(&value->u.reg);
+ return sm6_value_is_register(value) && register_is_scalar_constant_zero(&value->reg);
}
static inline bool sm6_value_is_undef(const struct sm6_value *value)
{
- return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF;
+ return sm6_value_is_register(value) && value->reg.type == VKD3DSPR_UNDEF;
}
static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count)
@@ -2272,26 +2287,26 @@ static bool sm6_value_is_icb(const struct sm6_value *value)
static bool sm6_value_is_ssa(const struct sm6_value *value)
{
- return sm6_value_is_register(value) && register_is_ssa(&value->u.reg);
+ return sm6_value_is_register(value) && register_is_ssa(&value->reg);
}
static bool sm6_value_is_numeric_array(const struct sm6_value *value)
{
- return sm6_value_is_register(value) && register_is_numeric_array(&value->u.reg);
+ return sm6_value_is_register(value) && register_is_numeric_array(&value->reg);
}
static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value)
{
if (!sm6_value_is_constant(value))
return UINT_MAX;
- return register_get_uint_value(&value->u.reg);
+ return register_get_uint_value(&value->reg);
}
static uint64_t sm6_value_get_constant_uint64(const struct sm6_value *value)
{
if (!sm6_value_is_constant(value))
return UINT64_MAX;
- return register_get_uint64_value(&value->u.reg);
+ return register_get_uint64_value(&value->reg);
}
static unsigned int sm6_parser_alloc_ssa_id(struct sm6_parser *sm6)
@@ -2390,15 +2405,55 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type
return VKD3D_DATA_UINT;
}
+static void sm6_register_from_value(struct vkd3d_shader_register *reg, const struct sm6_value *value)
+{
+ switch (value->value_type)
+ {
+ case VALUE_TYPE_REG:
+ *reg = value->reg;
+ break;
+
+ case VALUE_TYPE_SSA:
+ register_init_with_id(reg, VKD3DSPR_SSA, vkd3d_data_type_from_sm6_type(
+ sm6_type_get_scalar_type(value->type, 0)), value->u.ssa.id);
+ reg->dimension = sm6_type_is_scalar(value->type) ? VSIR_DIMENSION_SCALAR : VSIR_DIMENSION_VEC4;
+ break;
+
+ case VALUE_TYPE_FUNCTION:
+ case VALUE_TYPE_HANDLE:
+ case VALUE_TYPE_ICB:
+ vkd3d_unreachable();
+ }
+}
+
+static void sm6_parser_init_ssa_value(struct sm6_parser *sm6, struct sm6_value *value)
+{
+ unsigned int id;
+
+ if (register_is_ssa(&value->reg) && value->reg.idx[0].offset)
+ {
+ id = value->reg.idx[0].offset;
+ TRACE("Using forward-allocated id %u.\n", id);
+ }
+ else
+ {
+ id = sm6_parser_alloc_ssa_id(sm6);
+ }
+
+ value->value_type = VALUE_TYPE_SSA;
+ value->u.ssa.id = id;
+ sm6_register_from_value(&value->reg, value);
+}
+
static void register_init_ssa_vector(struct vkd3d_shader_register *reg, const struct sm6_type *type,
unsigned int component_count, struct sm6_value *value, struct sm6_parser *sm6)
{
enum vkd3d_data_type data_type;
unsigned int id;
- if (value && register_is_ssa(&value->u.reg) && value->u.reg.idx[0].offset)
+ if (value && register_is_ssa(&value->reg) && value->reg.idx[0].offset)
{
- id = value->u.reg.idx[0].offset;
+ id = value->reg.idx[0].offset;
TRACE("Using forward-allocated id %u.\n", id);
}
else
@@ -2450,13 +2505,6 @@ static void dst_param_init_vector(struct vkd3d_shader_dst_param *param, unsigned
param->shift = 0;
}
-static void dst_param_init_ssa_scalar(struct vkd3d_shader_dst_param *param, const struct sm6_type *type,
- struct sm6_value *value, struct sm6_parser *sm6)
-{
- dst_param_init(param);
- register_init_ssa_scalar(&param->reg, type, value, sm6);
-}
-
static inline void src_param_init(struct vkd3d_shader_src_param *param)
{
param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
@@ -2480,7 +2528,7 @@ static void src_param_init_vector(struct vkd3d_shader_src_param *param, unsigned
static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src)
{
src_param_init(param);
- param->reg = src->u.reg;
+ sm6_register_from_value(&param->reg, src);
}
static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param,
@@ -2547,9 +2595,9 @@ static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instructio
if (!(param = instruction_dst_params_alloc(ins, 1, sm6)))
return false;
- dst_param_init_ssa_scalar(param, dst->type, dst, sm6);
- param->write_mask = VKD3DSP_WRITEMASK_0;
- dst->u.reg = param->reg;
+ dst_param_init(param);
+ sm6_parser_init_ssa_value(sm6, dst);
+ sm6_register_from_value(&param->reg, dst);
return true;
}
@@ -2560,8 +2608,8 @@ static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instructio
struct sm6_value *dst = sm6_parser_get_current_value(sm6);
dst_param_init_vector(param, component_count);
- register_init_ssa_vector(&param->reg, sm6_type_get_scalar_type(dst->type, 0), component_count, dst, sm6);
- dst->u.reg = param->reg;
+ sm6_parser_init_ssa_value(sm6, dst);
+ sm6_register_from_value(&param->reg, dst);
}
static bool instruction_dst_param_init_temp_vector(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6)
@@ -2576,7 +2624,7 @@ static bool instruction_dst_param_init_temp_vector(struct vkd3d_shader_instructi
param->write_mask = VKD3DSP_WRITEMASK_ALL;
param->reg.idx[0].offset = 0;
param->reg.dimension = VSIR_DIMENSION_VEC4;
- dst->u.reg = param->reg;
+ dst->reg = param->reg;
return true;
}
@@ -2856,11 +2904,9 @@ static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const stru
else
{
value->type = fwd_type;
- value->value_type = VALUE_TYPE_REG;
- register_init_with_id(&value->u.reg, VKD3DSPR_SSA, vkd3d_data_type_from_sm6_type(
- sm6_type_get_scalar_type(fwd_type, 0)), sm6_parser_alloc_ssa_id(sm6));
- value->u.reg.dimension = sm6_type_is_scalar(fwd_type) ? VSIR_DIMENSION_SCALAR
- : VSIR_DIMENSION_VEC4;
+ value->value_type = VALUE_TYPE_SSA;
+ value->u.ssa.id = sm6_parser_alloc_ssa_id(sm6);
+ sm6_register_from_value(&value->reg, value);
}
}
@@ -2990,6 +3036,13 @@ static float register_get_float_value(const struct vkd3d_shader_register *reg)
return bitcast_uint_to_float(reg->u.immconst_u32[0]);
}
+static inline float sm6_value_get_constant_float(const struct sm6_value *value)
+{
+ if (!sm6_value_is_constant(value))
+ return UINT_MAX;
+ return register_get_float_value(&value->reg);
+}
+
static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type,
const uint64_t *operands, struct sm6_parser *sm6)
{
@@ -3066,6 +3119,7 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c
struct sm6_value *dst)
{
const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type;
+ struct vkd3d_shader_register reg;
struct sm6_value *operands[3];
unsigned int i, j, offset;
uint64_t value;
@@ -3109,7 +3163,9 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c
}
}
- if (operands[0]->u.reg.idx_count > 1)
+ sm6_register_from_value(&reg, operands[0]);
+
+ if (reg.idx_count > 1)
{
WARN("Unsupported stacked GEP.\n");
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
@@ -3170,10 +3226,10 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c
"Module does not define a pointer type for a constexpr GEP result.");
return VKD3D_ERROR_INVALID_SHADER;
}
- dst->u.reg = operands[0]->u.reg;
- dst->u.reg.idx[1].offset = offset;
- dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP;
- dst->u.reg.idx_count = 2;
+ dst->reg = reg;
+ dst->reg.idx[1].offset = offset;
+ dst->reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP;
+ dst->reg.idx_count = 2;
return VKD3D_OK;
}
@@ -3232,7 +3288,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
dst->type = type;
dst->value_type = VALUE_TYPE_REG;
dst->is_back_ref = true;
- vsir_register_init(&dst->u.reg, reg_type, reg_data_type, 0);
+ vsir_register_init(&dst->reg, reg_type, reg_data_type, 0);
switch (record->code)
{
@@ -3257,9 +3313,9 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
value = decode_rotated_signed_value(record->operands[0]);
if (type->u.width <= 32)
- dst->u.reg.u.immconst_u32[0] = value & ((1ull << type->u.width) - 1);
+ dst->reg.u.immconst_u32[0] = value & ((1ull << type->u.width) - 1);
else
- dst->u.reg.u.immconst_u64[0] = value;
+ dst->reg.u.immconst_u64[0] = value;
break;
@@ -3274,11 +3330,11 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
}
if (type->u.width == 16)
- dst->u.reg.u.immconst_u32[0] = record->operands[0];
+ dst->reg.u.immconst_u32[0] = record->operands[0];
else if (type->u.width == 32)
- dst->u.reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]);
+ dst->reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]);
else if (type->u.width == 64)
- dst->u.reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]);
+ dst->reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]);
else
vkd3d_unreachable();
@@ -3344,13 +3400,13 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
/* Resolve later in case forward refs exist. */
dst->type = type;
- dst->u.reg.type = VKD3DSPR_COUNT;
- dst->u.reg.idx[0].offset = value;
+ dst->reg.type = VKD3DSPR_COUNT;
+ dst->reg.idx[0].offset = value;
break;
case CST_CODE_UNDEF:
dxil_record_validate_operand_max_count(record, 0, sm6);
- dst->u.reg.type = VKD3DSPR_UNDEF;
+ dst->reg.type = VKD3DSPR_UNDEF;
/* Mark as explicitly undefined, not the result of a missing constant code or instruction. */
dst->is_undefined = true;
break;
@@ -3359,7 +3415,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
FIXME("Unhandled constant code %u.\n", record->code);
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
"Constant code %u is unhandled.", record->code);
- dst->u.reg.type = VKD3DSPR_UNDEF;
+ dst->reg.type = VKD3DSPR_UNDEF;
break;
}
@@ -3377,12 +3433,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
for (i = base_value_idx; i < sm6->value_count; ++i)
{
dst = &sm6->values[i];
- if (dst->u.reg.type != VKD3DSPR_COUNT)
+ if (dst->reg.type != VKD3DSPR_COUNT)
continue;
type = dst->type;
- src = &sm6->values[dst->u.reg.idx[0].offset];
+ src = &sm6->values[dst->reg.idx[0].offset];
if (!sm6_value_is_numeric_array(src))
{
WARN("Value is not an array.\n");
@@ -3393,7 +3449,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const
*dst = *src;
dst->type = type;
- dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type);
+ dst->reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type);
}
return VKD3D_OK;
@@ -3442,7 +3498,7 @@ static void sm6_parser_declare_icb(struct sm6_parser *sm6, const struct sm6_type
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER);
/* The icb value index will be resolved later so forward references can be handled. */
ins->declaration.icb = (void *)(intptr_t)init;
- register_init_with_id(&dst->u.reg, VKD3DSPR_IMMCONSTBUFFER, data_type, init);
+ register_init_with_id(&dst->reg, VKD3DSPR_IMMCONSTBUFFER, data_type, init);
}
static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const struct sm6_type *elem_type,
@@ -3464,7 +3520,7 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru
/* The initialiser value index will be resolved later so forward references can be handled. */
ins->declaration.indexable_temp.initialiser = (void *)(uintptr_t)init;
- register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx);
+ register_init_with_id(&dst->reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx);
}
static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type,
@@ -3477,7 +3533,7 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6
ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW);
dst_param_init(&ins->declaration.tgsm_raw.reg);
register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++);
- dst->u.reg = ins->declaration.tgsm_raw.reg.reg;
+ dst->reg = ins->declaration.tgsm_raw.reg.reg;
dst->structure_stride = 0;
ins->declaration.tgsm_raw.alignment = alignment;
byte_count = elem_type->u.width / 8u;
@@ -3503,7 +3559,7 @@ static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const str
dst_param_init(&ins->declaration.tgsm_structured.reg);
register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM,
data_type, sm6->tgsm_count++);
- dst->u.reg = ins->declaration.tgsm_structured.reg.reg;
+ dst->reg = ins->declaration.tgsm_structured.reg.reg;
structure_stride = elem_type->u.width / 8u;
if (structure_stride != 4)
{
@@ -3812,11 +3868,11 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6)
const struct vkd3d_shader_immediate_constant_buffer *icb;
struct sm6_value *value = &sm6->values[i];
- if (!sm6_value_is_register(value) || value->u.reg.type != VKD3DSPR_IMMCONSTBUFFER)
+ if (!sm6_value_is_register(value) || value->reg.type != VKD3DSPR_IMMCONSTBUFFER)
continue;
- if ((icb = resolve_forward_initialiser(value->u.reg.idx[0].offset, sm6)))
- value->u.reg.idx[0].offset = icb->register_idx;
+ if ((icb = resolve_forward_initialiser(value->reg.idx[0].offset, sm6)))
+ value->reg.idx[0].offset = icb->register_idx;
}
return VKD3D_OK;
@@ -4027,8 +4083,9 @@ struct function_emission_state
unsigned int temp_idx;
};
-static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs,
- unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg);
+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6,
+ const struct vkd3d_shader_register *operand_regs, unsigned int component_count,
+ struct function_emission_state *state, struct vkd3d_shader_register *reg);
static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record,
struct vkd3d_shader_instruction *ins, struct sm6_value *dst)
@@ -4136,11 +4193,11 @@ static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code)
static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record,
struct function_emission_state *state, struct sm6_value *dst)
{
- struct vkd3d_shader_register coord, const_offset, const_zero;
- const struct vkd3d_shader_register *regs[2];
struct vkd3d_shader_dst_param *dst_params;
struct vkd3d_shader_src_param *src_params;
+ struct vkd3d_shader_register regs[2], reg;
struct vkd3d_shader_instruction *ins;
+ struct vkd3d_shader_register coord;
const struct sm6_value *ptr, *src;
enum vkd3d_shader_opcode op;
unsigned int i = 0;
@@ -4152,7 +4209,9 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_
|| !sm6_value_validate_is_backward_ref(ptr, sm6))
return;
- if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM)
+ sm6_register_from_value(&reg, ptr);
+
+ if (reg.type != VKD3DSPR_GROUPSHAREDMEM)
{
WARN("Register is not groupshared.\n");
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
@@ -4187,17 +4246,11 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_
if (ptr->structure_stride)
{
- if (ptr->u.reg.idx[1].rel_addr)
- {
- regs[0] = &ptr->u.reg.idx[1].rel_addr->reg;
- }
+ if (reg.idx[1].rel_addr)
+ regs[0] = reg.idx[1].rel_addr->reg;
else
- {
- register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset);
- regs[0] = &const_offset;
- }
- register_make_constant_uint(&const_zero, 0);
- regs[1] = &const_zero;
+ register_make_constant_uint(&regs[0], reg.idx[1].offset);
+ register_make_constant_uint(&regs[1], 0);
if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord))
return;
}
@@ -4214,18 +4267,18 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_
src_param_make_constant_uint(&src_params[0], 0);
src_param_init_from_value(&src_params[1], src);
+ sm6_parser_init_ssa_value(sm6, dst);
+
dst_params = instruction_dst_params_alloc(ins, 2, sm6);
- register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6);
+ sm6_register_from_value(&dst_params[0].reg, dst);
dst_param_init(&dst_params[0]);
- dst_params[1].reg = ptr->u.reg;
+ dst_params[1].reg = reg;
dst_params[1].reg.data_type = VKD3D_DATA_UNUSED;
dst_params[1].reg.idx[1].rel_addr = NULL;
dst_params[1].reg.idx[1].offset = ~0u;
dst_params[1].reg.idx_count = 1;
dst_param_init(&dst_params[1]);
-
- dst->u.reg = dst_params[0].reg;
}
static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a,
@@ -4401,9 +4454,9 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco
dst_param_init(&dst_params[0]);
dst_param_init(&dst_params[1]);
- register_init_ssa_scalar(&dst_params[index].reg, a->type, dst, sm6);
+ sm6_parser_init_ssa_value(sm6, dst);
+ sm6_register_from_value(&dst_params[index].reg, dst);
vsir_dst_param_init_null(&dst_params[index ^ 1]);
- dst->u.reg = dst_params[index].reg;
}
else
{
@@ -4464,7 +4517,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record
dxil_record_validate_operand_max_count(record, i, sm6);
code_block->terminator.type = TERMINATOR_COND_BR;
- code_block->terminator.conditional_reg = value->u.reg;
+ sm6_register_from_value(&code_block->terminator.conditional_reg, value);
code_block->terminator.true_block = sm6_function_get_block(function, record->operands[0], sm6);
code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6);
}
@@ -4472,8 +4525,9 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record
ins->opcode = VKD3DSIH_NOP;
}
-static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs,
- unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg)
+static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6,
+ const struct vkd3d_shader_register *operand_regs, unsigned int component_count,
+ struct function_emission_state *state, struct vkd3d_shader_register *reg)
{
struct vkd3d_shader_instruction *ins = state->ins;
struct vkd3d_shader_src_param *src_params;
@@ -4483,25 +4537,25 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, cons
if (component_count == 1)
{
- *reg = *operand_regs[0];
+ *reg = operand_regs[0];
return true;
}
for (i = 0; i < component_count; ++i)
- all_constant &= register_is_constant(operand_regs[i]);
+ all_constant &= register_is_constant(&operand_regs[i]);
if (all_constant)
{
- vsir_register_init(reg, VKD3DSPR_IMMCONST, operand_regs[0]->data_type, 0);
+ vsir_register_init(reg, VKD3DSPR_IMMCONST, operand_regs[0].data_type, 0);
reg->dimension = VSIR_DIMENSION_VEC4;
for (i = 0; i < component_count; ++i)
- reg->u.immconst_u32[i] = operand_regs[i]->u.immconst_u32[0];
+ reg->u.immconst_u32[i] = operand_regs[i].u.immconst_u32[0];
for (; i < VKD3D_VEC4_SIZE; ++i)
reg->u.immconst_u32[i] = 0;
return true;
}
- register_init_with_id(reg, VKD3DSPR_TEMP, operand_regs[0]->data_type, state->temp_idx++);
+ register_init_with_id(reg, VKD3DSPR_TEMP, operand_regs[0].data_type, state->temp_idx++);
reg->dimension = VSIR_DIMENSION_VEC4;
for (i = 0; i < component_count; ++i, ++ins)
@@ -4512,7 +4566,7 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, cons
return false;
src_param_init(&src_params[0]);
- src_params[0].reg = *operand_regs[i];
+ src_params[0].reg = operand_regs[i];
if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6)))
return false;
@@ -4530,11 +4584,11 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, cons
static bool sm6_parser_emit_composite_construct(struct sm6_parser *sm6, const struct sm6_value **operands,
unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg)
{
- const struct vkd3d_shader_register *operand_regs[VKD3D_VEC4_SIZE];
+ struct vkd3d_shader_register operand_regs[VKD3D_VEC4_SIZE];
unsigned int i;
for (i = 0; i < component_count; ++i)
- operand_regs[i] = &operands[i]->u.reg;
+ sm6_register_from_value(&operand_regs[i], operands[i]);
return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg);
}
@@ -4543,19 +4597,18 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s
unsigned int max_operands, const struct sm6_value *z_operand, struct function_emission_state *state,
struct vkd3d_shader_register *reg)
{
- const struct vkd3d_shader_register *operand_regs[VKD3D_VEC4_SIZE];
+ struct vkd3d_shader_register operand_regs[VKD3D_VEC4_SIZE];
unsigned int component_count;
for (component_count = 0; component_count < max_operands; ++component_count)
{
if (!z_operand && operands[component_count]->is_undefined)
break;
- operand_regs[component_count] = &operands[component_count]->u.reg;
+ sm6_register_from_value(&operand_regs[component_count], operands[component_count]);
}
+
if (z_operand)
- {
- operand_regs[component_count++] = &z_operand->u.reg;
- }
+ sm6_register_from_value(&operand_regs[component_count++], z_operand);
return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg);
}
@@ -4780,7 +4833,7 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr
}
else
{
- reg = operands[coord_idx]->u.reg;
+ sm6_register_from_value(&reg, operands[coord_idx]);
}
for (i = coord_idx + coord_count; i < coord_idx + 3; ++i)
@@ -4810,7 +4863,7 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr
dst_param_init(&dst_params[1]);
sm6_register_from_handle(sm6, &resource->u.handle, &dst_params[1].reg);
- dst->u.reg = dst_params[0].reg;
+ dst->reg = dst_params[0].reg;
}
static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
@@ -5957,7 +6010,7 @@ static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_
dst_param_init(&dst_params[1]);
register_init_ssa_scalar(&dst_params[index].reg, dst->type, dst, sm6);
vsir_dst_param_init_null(&dst_params[index ^ 1]);
- dst->u.reg = dst_params[index].reg;
+ dst->reg = dst_params[index].reg;
}
static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
@@ -6574,7 +6627,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade
return;
type = sm6_type_get_scalar_type(dst->type, 0);
- vsir_register_init(&dst->u.reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0);
+ vsir_register_init(&dst->reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0);
/* dst->is_undefined is not set here because it flags only explicitly undefined values. */
}
@@ -6680,7 +6733,7 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_recor
"Expected a constant integer dx intrinsic function id.");
return;
}
- sm6_parser_decode_dx_op(sm6, register_get_uint_value(&op_value->u.reg),
+ sm6_parser_decode_dx_op(sm6, sm6_value_get_constant_uint(op_value),
fn_value->u.function.name, &operands[1], operand_count - 1, state, dst);
}
@@ -6826,10 +6879,10 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor
if (handler_idx == VKD3DSIH_NOP)
{
- dst->u.reg = value->u.reg;
+ sm6_register_from_value(&dst->reg, value);
/* Set the result type for casts from 16-bit min precision. */
if (type->u.width != 16)
- dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type);
+ dst->reg.data_type = vkd3d_data_type_from_sm6_type(type);
return;
}
@@ -6841,7 +6894,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor
/* bitcast */
if (handler_idx == VKD3DSIH_MOV)
- src_param->reg.data_type = dst->u.reg.data_type;
+ src_param->reg.data_type = dst->reg.data_type;
}
struct sm6_cmp_info
@@ -6994,6 +7047,7 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re
struct vkd3d_shader_dst_param *dst_params;
struct vkd3d_shader_src_param *src_params;
const struct sm6_value *ptr, *cmp, *new;
+ struct vkd3d_shader_register reg;
unsigned int i = 0;
bool is_volatile;
uint64_t code;
@@ -7003,7 +7057,9 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re
|| !sm6_value_validate_is_backward_ref(ptr, sm6))
return;
- if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM)
+ sm6_register_from_value(&reg, ptr);
+
+ if (reg.type != VKD3DSPR_GROUPSHAREDMEM)
{
WARN("Register is not groupshared.\n");
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
@@ -7063,10 +7119,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re
return;
register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6);
dst_param_init(&dst_params[0]);
- dst_params[1].reg = ptr->u.reg;
+ dst_params[1].reg = reg;
dst_param_init(&dst_params[1]);
- dst->u.reg = dst_params[0].reg;
+ dst->reg = dst_params[0].reg;
}
static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil_record *record,
@@ -7122,7 +7178,7 @@ static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil
if (!(src_param = instruction_src_params_alloc(ins, 1, sm6)))
return;
- src_param->reg = src->u.reg;
+ sm6_register_from_value(&src_param->reg, src);
src_param_init_scalar(src_param, elem_idx);
instruction_dst_param_init_ssa_scalar(ins, sm6);
@@ -7135,7 +7191,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record
unsigned int elem_idx, operand_idx = 2;
enum bitcode_address_space addr_space;
const struct sm6_value *elem_value;
- struct vkd3d_shader_register *reg;
+ struct vkd3d_shader_register reg;
const struct sm6_value *src;
bool is_in_bounds;
@@ -7149,7 +7205,9 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record
return;
}
- if (src->u.reg.idx_count > 1)
+ sm6_register_from_value(&reg, src);
+
+ if (reg.idx_count > 1)
{
WARN("Unsupported stacked GEP.\n");
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
@@ -7218,12 +7276,12 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record
return;
}
- reg = &dst->u.reg;
- *reg = src->u.reg;
- reg->idx[1].offset = 0;
- register_index_address_init(&reg->idx[1], elem_value, sm6);
- reg->idx[1].is_in_bounds = is_in_bounds;
- reg->idx_count = 2;
+ reg.idx[1].offset = 0;
+ register_index_address_init(&reg.idx[1], elem_value, sm6);
+ reg.idx[1].is_in_bounds = is_in_bounds;
+ reg.idx_count = 2;
+
+ dst->reg = reg;
dst->structure_stride = src->structure_stride;
ins->opcode = VKD3DSIH_NOP;
@@ -7235,6 +7293,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor
const struct sm6_type *elem_type = NULL, *pointee_type;
unsigned int alignment, operand_count, i = 0;
struct vkd3d_shader_src_param *src_params;
+ struct vkd3d_shader_register reg;
const struct sm6_value *ptr;
uint64_t alignment_code;
@@ -7272,25 +7331,29 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor
if (record->operands[i])
WARN("Ignoring volatile modifier.\n");
+ sm6_register_from_value(&reg, ptr);
+
if (ptr->structure_stride)
{
- VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ VKD3D_ASSERT(reg.type == VKD3DSPR_GROUPSHAREDMEM);
vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED);
if (!(src_params = instruction_src_params_alloc(ins, 3, sm6)))
return;
- if (ptr->u.reg.idx[1].rel_addr)
- src_params[0] = *ptr->u.reg.idx[1].rel_addr;
+ if (reg.idx[1].rel_addr)
+ src_params[0] = *reg.idx[1].rel_addr;
else
- src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset);
+ src_param_make_constant_uint(&src_params[0], reg.idx[1].offset);
/* Struct offset is always zero as there is no struct, just an array. */
src_param_make_constant_uint(&src_params[1], 0);
src_param_init_from_value(&src_params[2], ptr);
src_params[2].reg.alignment = alignment;
+ /* The offset is already in src_params[0]. */
+ src_params[2].reg.idx_count = 1;
}
else
{
- operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ operand_count = 1 + (reg.type == VKD3DSPR_GROUPSHAREDMEM);
vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV);
if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6)))
@@ -7341,11 +7404,11 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record
}
dst->type = type;
- register_init_ssa_scalar(&dst->u.reg, type, dst, sm6);
+ register_init_ssa_scalar(&dst->reg, type, dst, sm6);
if (!(phi = sm6_block_phi_require_space(code_block, sm6)))
return;
- phi->reg = dst->u.reg;
+ sm6_register_from_value(&phi->reg, dst);
phi->incoming_count = record->operand_count / 2u;
if (!vkd3d_array_reserve((void **)&phi->incoming, &phi->incoming_capacity, phi->incoming_count,
@@ -7421,6 +7484,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco
struct vkd3d_shader_src_param *src_params;
struct vkd3d_shader_dst_param *dst_param;
const struct sm6_value *ptr, *src;
+ struct vkd3d_shader_register reg;
uint64_t alignment_code;
if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))
@@ -7455,24 +7519,26 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco
if (record->operands[i])
WARN("Ignoring volatile modifier.\n");
+ sm6_register_from_value(&reg, ptr);
+
if (ptr->structure_stride)
{
- VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ VKD3D_ASSERT(reg.type == VKD3DSPR_GROUPSHAREDMEM);
vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED);
if (!(src_params = instruction_src_params_alloc(ins, 3, sm6)))
return;
- if (ptr->u.reg.idx[1].rel_addr)
- src_params[0] = *ptr->u.reg.idx[1].rel_addr;
+ if (reg.idx[1].rel_addr)
+ src_params[0] = *reg.idx[1].rel_addr;
else
- src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset);
+ src_param_make_constant_uint(&src_params[0], reg.idx[1].offset);
/* Struct offset is always zero as there is no struct, just an array. */
src_param_make_constant_uint(&src_params[1], 0);
src_param_init_from_value(&src_params[2], src);
}
else
{
- operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ operand_count = 1 + (reg.type == VKD3DSPR_GROUPSHAREDMEM);
vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV);
if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6)))
@@ -7484,7 +7550,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco
dst_param = instruction_dst_params_alloc(ins, 1, sm6);
dst_param_init(dst_param);
- dst_param->reg = ptr->u.reg;
+ dst_param->reg = reg;
dst_param->reg.alignment = alignment;
/* Groupshared stores contain the address in the src params. */
if (dst_param->reg.type != VKD3DSPR_IDXTEMP)
@@ -7529,7 +7595,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec
return;
}
- terminator->conditional_reg = src->u.reg;
+ sm6_register_from_value(&terminator->conditional_reg, src);
terminator->type = TERMINATOR_SWITCH;
terminator->case_count = record->operand_count / 2u;
@@ -7645,7 +7711,7 @@ static bool sm6_metadata_get_uint_value(const struct sm6_parser *sm6,
if (!sm6_type_is_integer(value->type))
return false;
- *u = register_get_uint_value(&value->u.reg);
+ *u = sm6_value_get_constant_uint(value);
return true;
}
@@ -7664,7 +7730,7 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6,
if (!sm6_type_is_integer(value->type))
return false;
- *u = register_get_uint64_value(&value->u.reg);
+ *u = sm6_value_get_constant_uint(value);
return true;
}
@@ -7683,7 +7749,7 @@ static bool sm6_metadata_get_float_value(const struct sm6_parser *sm6,
if (!sm6_type_is_floating_point(value->type))
return false;
- *f = register_get_float_value(&value->u.reg);
+ *f = sm6_value_get_constant_float(value);
return true;
}
@@ -7868,7 +7934,7 @@ static void metadata_attachment_record_apply(const struct dxil_record *record, e
}
else if (metadata_node_get_unary_uint(node, &operand, sm6))
{
- dst->u.reg.non_uniform = !!operand;
+ dst->reg.non_uniform = !!operand;
}
}
else
@@ -7940,13 +8006,13 @@ static enum vkd3d_result sm6_function_resolve_phi_incomings(const struct sm6_fun
"A PHI incoming value is not a constant or SSA register.");
return VKD3D_ERROR_INVALID_SHADER;
}
- if (src->u.reg.data_type != phi->reg.data_type)
+ if (src->reg.data_type != phi->reg.data_type)
{
WARN("Type mismatch.\n");
vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH,
"The type of a phi incoming value does not match the result type.");
}
- phi->incoming[j].reg = src->u.reg;
+ sm6_register_from_value(&phi->incoming[j].reg, src);
}
}
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index a6b46474812..0f9aafbe13e 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -2291,6 +2291,26 @@ struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interl
return &interlocked->node;
}
+static struct hlsl_ir_node *hlsl_new_sync(struct hlsl_ctx *ctx,
+ uint32_t sync_flags, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_sync *sync;
+
+ if (!(sync = hlsl_alloc(ctx, sizeof(*sync))))
+ return NULL;
+
+ init_node(&sync->node, HLSL_IR_SYNC, NULL, loc);
+ sync->sync_flags = sync_flags;
+
+ return &sync->node;
+}
+
+struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ uint32_t sync_flags, const struct vkd3d_shader_location *loc)
+{
+ return append_new_instr(ctx, block, hlsl_new_sync(ctx, sync_flags, loc));
+}
+
bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index)
{
struct hlsl_type *type = index->val.node->data_type;
@@ -2681,6 +2701,18 @@ static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx,
return &dst->node;
}
+static struct hlsl_ir_node *clone_sync(struct hlsl_ctx *ctx, struct hlsl_ir_sync *src)
+{
+ struct hlsl_ir_sync *dst;
+
+ if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
+ return NULL;
+ init_node(&dst->node, HLSL_IR_SYNC, NULL, &src->node.loc);
+ dst->sync_flags = src->sync_flags;
+
+ return &dst->node;
+}
+
static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_compile *compile)
{
@@ -2884,6 +2916,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_INTERLOCKED:
return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr));
+ case HLSL_IR_SYNC:
+ return clone_sync(ctx, hlsl_ir_sync(instr));
+
case HLSL_IR_COMPILE:
return clone_compile(ctx, map, hlsl_ir_compile(instr));
@@ -3341,7 +3376,9 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type)
[HLSL_IR_STORE ] = "HLSL_IR_STORE",
[HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH",
[HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE",
+
[HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED",
+ [HLSL_IR_SYNC ] = "HLSL_IR_SYNC",
[HLSL_IR_COMPILE] = "HLSL_IR_COMPILE",
[HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE",
@@ -3831,6 +3868,19 @@ static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct
vkd3d_string_buffer_printf(buffer, ")");
}
+static void dump_ir_sync(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_sync *sync)
+{
+ vkd3d_string_buffer_printf(buffer, "sync");
+ if (sync->sync_flags & VKD3DSSF_GLOBAL_UAV)
+ vkd3d_string_buffer_printf(buffer, "_uglobal");
+ if (sync->sync_flags & VKD3DSSF_THREAD_GROUP_UAV)
+ vkd3d_string_buffer_printf(buffer, "_ugroup");
+ if (sync->sync_flags & VKD3DSSF_GROUP_SHARED_MEMORY)
+ vkd3d_string_buffer_printf(buffer, "_g");
+ if (sync->sync_flags & VKD3DSSF_THREAD_GROUP)
+ vkd3d_string_buffer_printf(buffer, "_t");
+}
+
static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
const struct hlsl_ir_compile *compile)
{
@@ -3968,6 +4018,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr));
break;
+ case HLSL_IR_SYNC:
+ dump_ir_sync(buffer, hlsl_ir_sync(instr));
+ break;
+
case HLSL_IR_COMPILE:
dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr));
break;
@@ -4205,6 +4259,11 @@ static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked)
vkd3d_free(interlocked);
}
+static void free_ir_sync(struct hlsl_ir_sync *sync)
+{
+ vkd3d_free(sync);
+}
+
static void free_ir_compile(struct hlsl_ir_compile *compile)
{
unsigned int i;
@@ -4295,6 +4354,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
free_ir_interlocked(hlsl_ir_interlocked(node));
break;
+ case HLSL_IR_SYNC:
+ free_ir_sync(hlsl_ir_sync(node));
+ break;
+
case HLSL_IR_COMPILE:
free_ir_compile(hlsl_ir_compile(node));
break;
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index 8cb805a2e66..9eb86534f81 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -329,7 +329,9 @@ enum hlsl_ir_node_type
HLSL_IR_STORE,
HLSL_IR_SWIZZLE,
HLSL_IR_SWITCH,
+
HLSL_IR_INTERLOCKED,
+ HLSL_IR_SYNC,
HLSL_IR_COMPILE,
HLSL_IR_SAMPLER_STATE,
@@ -1006,6 +1008,15 @@ struct hlsl_ir_interlocked
struct hlsl_src coords, cmp_value, value;
};
+/* Represents a thread synchronization instruction such as GroupMemoryBarrier().*/
+struct hlsl_ir_sync
+{
+ struct hlsl_ir_node node;
+
+ /* Flags from enum vkd3d_shader_sync_flags. */
+ uint32_t sync_flags;
+};
+
struct hlsl_scope
{
/* Item entry for hlsl_ctx.scopes. */
@@ -1343,6 +1354,12 @@ static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_
return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node);
}
+static inline struct hlsl_ir_sync *hlsl_ir_sync(const struct hlsl_ir_node *node)
+{
+ VKD3D_ASSERT(node->type == HLSL_IR_SYNC);
+ return CONTAINING_RECORD(node, struct hlsl_ir_sync, node);
+}
+
static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node)
{
VKD3D_ASSERT(node->type == HLSL_IR_COMPILE);
@@ -1582,6 +1599,8 @@ void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block,
unsigned int writemask, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s,
unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ uint32_t sync_flags, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block,
unsigned int n, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_unary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index 702fd30bda3..05657d27b38 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -574,13 +574,14 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
/* fall-through */
case HLSL_IR_CALL:
case HLSL_IR_IF:
+ case HLSL_IR_INTERLOCKED:
case HLSL_IR_LOOP:
case HLSL_IR_JUMP:
case HLSL_IR_RESOURCE_LOAD:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
- case HLSL_IR_INTERLOCKED:
case HLSL_IR_STATEBLOCK_CONSTANT:
+ case HLSL_IR_SYNC:
hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
"Expected literal expression.");
break;
@@ -5110,6 +5111,67 @@ static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx,
return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor");
}
+static void validate_group_barrier_profile(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc)
+{
+ if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE || hlsl_version_lt(ctx, 5, 0))
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "Group barriers can only be used in compute shaders 5.0 or higher.");
+ }
+}
+
+static bool intrinsic_AllMemoryBarrier(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ validate_group_barrier_profile(ctx, loc);
+ return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV
+ | VKD3DSSF_GROUP_SHARED_MEMORY, loc);
+}
+
+static bool intrinsic_AllMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ validate_group_barrier_profile(ctx, loc);
+ return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV
+ | VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc);
+}
+
+static bool intrinsic_DeviceMemoryBarrier(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ if ((ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE && ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL)
+ || hlsl_version_lt(ctx, 5, 0))
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "DeviceMemoryBarrier() can only be used in pixel and compute shaders 5.0 or higher.");
+ }
+ return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV, loc);
+}
+
+static bool intrinsic_DeviceMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ validate_group_barrier_profile(ctx, loc);
+ return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV
+ | VKD3DSSF_THREAD_GROUP, loc);
+}
+
+static bool intrinsic_GroupMemoryBarrier(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ validate_group_barrier_profile(ctx, loc);
+ return !!hlsl_block_add_sync(ctx, params->instrs,
+ VKD3DSSF_GROUP_SHARED_MEMORY, loc);
+}
+
+static bool intrinsic_GroupMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ validate_group_barrier_profile(ctx, loc);
+ return !!hlsl_block_add_sync(ctx, params->instrs,
+ VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc);
+}
+
static const struct intrinsic_function
{
const char *name;
@@ -5121,8 +5183,14 @@ static const struct intrinsic_function
intrinsic_functions[] =
{
/* Note: these entries should be kept in alphabetical order. */
+ {"AllMemoryBarrier", 0, true, intrinsic_AllMemoryBarrier},
+ {"AllMemoryBarrierWithGroupSync", 0, true, intrinsic_AllMemoryBarrierWithGroupSync},
{"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4},
+ {"DeviceMemoryBarrier", 0, true, intrinsic_DeviceMemoryBarrier},
+ {"DeviceMemoryBarrierWithGroupSync", 0, true, intrinsic_DeviceMemoryBarrierWithGroupSync},
{"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount},
+ {"GroupMemoryBarrier", 0, true, intrinsic_GroupMemoryBarrier},
+ {"GroupMemoryBarrierWithGroupSync", 0, true, intrinsic_GroupMemoryBarrierWithGroupSync},
{"InterlockedAdd", -1, true, intrinsic_InterlockedAdd},
{"InterlockedAnd", -1, true, intrinsic_InterlockedAnd},
{"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange},
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index bc14885af2b..9c3affda534 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -288,7 +288,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls
static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic,
- uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc)
+ uint32_t index, bool output, bool force_align, bool create, const struct vkd3d_shader_location *loc)
{
struct hlsl_semantic new_semantic;
struct hlsl_ir_var *ext_var;
@@ -311,6 +311,11 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
|| ext_var->data_type->class <= HLSL_CLASS_VECTOR);
VKD3D_ASSERT(hlsl_type_is_primitive_array(type) || type->class <= HLSL_CLASS_VECTOR);
+ vkd3d_free(new_name);
+
+ if (!create)
+ return ext_var;
+
if (output)
{
if (index >= semantic->reported_duplicated_output_next_index)
@@ -336,11 +341,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir
}
}
- vkd3d_free(new_name);
return ext_var;
}
}
+ VKD3D_ASSERT(create);
+
if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic)))
{
vkd3d_free(new_name);
@@ -429,7 +435,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
prim_type_src->modifiers = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK;
if (!(input = add_semantic_var(ctx, func, var, prim_type_src,
- modifiers, semantic, semantic_index + i, false, force_align, loc)))
+ modifiers, semantic, semantic_index + i, false, force_align, true, loc)))
return;
hlsl_init_simple_deref_from_var(&prim_deref, input);
@@ -442,7 +448,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
else
{
if (!(input = add_semantic_var(ctx, func, var, vector_type_src,
- modifiers, semantic, semantic_index + i, false, force_align, loc)))
+ modifiers, semantic, semantic_index + i, false, force_align, true, loc)))
return;
if (!(load = hlsl_new_var_load(ctx, input, &var->loc)))
@@ -550,9 +556,9 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function
list_move_head(&func->body.instrs, &block.instrs);
}
-static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
- struct hlsl_ir_load *rhs, uint32_t modifiers,
- struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers,
+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create)
{
struct hlsl_type *type = rhs->node.data_type, *vector_type;
struct vkd3d_shader_location *loc = &rhs->node.loc;
@@ -582,49 +588,48 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec
struct hlsl_ir_node *load;
if (!(output = add_semantic_var(ctx, func, var, vector_type,
- modifiers, semantic, semantic_index + i, true, force_align, loc)))
+ modifiers, semantic, semantic_index + i, true, force_align, create, loc)))
return;
if (type->class == HLSL_CLASS_MATRIX)
{
- c = hlsl_block_add_uint_constant(ctx, &func->body, i, &var->loc);
- load = hlsl_block_add_load_index(ctx, &func->body, &rhs->src, c, &var->loc);
+ c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc);
+ load = hlsl_block_add_load_index(ctx, block, &rhs->src, c, &var->loc);
}
else
{
VKD3D_ASSERT(i == 0);
- load = hlsl_block_add_load_index(ctx, &func->body, &rhs->src, NULL, &var->loc);
+ load = hlsl_block_add_load_index(ctx, block, &rhs->src, NULL, &var->loc);
}
- hlsl_block_add_simple_store(ctx, &func->body, output, load);
+ hlsl_block_add_simple_store(ctx, block, output, load);
}
}
-static void append_output_copy_recurse(struct hlsl_ctx *ctx,
- struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers,
- struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
+static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_function_decl *func, const struct hlsl_type *type, struct hlsl_ir_load *rhs, uint32_t modifiers,
+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create)
{
struct vkd3d_shader_location *loc = &rhs->node.loc;
- struct hlsl_type *type = rhs->node.data_type;
struct hlsl_ir_var *var = rhs->src.var;
struct hlsl_ir_node *c;
unsigned int i;
if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT)
{
- struct hlsl_ir_load *element_load;
- struct hlsl_struct_field *field;
- uint32_t elem_semantic_index;
-
for (i = 0; i < hlsl_type_element_count(type); ++i)
{
- uint32_t element_modifiers;
+ uint32_t element_modifiers, elem_semantic_index;
+ const struct hlsl_type *element_type;
+ struct hlsl_ir_load *element_load;
+ struct hlsl_struct_field *field;
if (type->class == HLSL_CLASS_ARRAY)
{
elem_semantic_index = semantic_index
+ i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4;
+ element_type = type->e.array.type;
element_modifiers = modifiers;
force_align = true;
}
@@ -637,23 +642,24 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx,
semantic = &field->semantic;
elem_semantic_index = semantic->index;
loc = &field->loc;
+ element_type = field->type;
element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers);
force_align = (i == 0);
}
- c = hlsl_block_add_uint_constant(ctx, &func->body, i, &var->loc);
+ c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc);
if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc)))
return;
- hlsl_block_add_instr(&func->body, &element_load->node);
+ hlsl_block_add_instr(block, &element_load->node);
- append_output_copy_recurse(ctx, func, element_load, element_modifiers,
- semantic, elem_semantic_index, force_align);
+ append_output_copy_recurse(ctx, block, func, element_type, element_load, element_modifiers, semantic,
+ elem_semantic_index, force_align, create);
}
}
else
{
- append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align);
+ append_output_copy(ctx, block, func, rhs, modifiers, semantic, semantic_index, force_align, create);
}
}
@@ -669,7 +675,8 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function
return;
hlsl_block_add_instr(&func->body, &load->node);
- append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false);
+ append_output_copy_recurse(ctx, &func->body, func, var->data_type, load, var->storage_modifiers,
+ &var->semantic, var->semantic.index, false, true);
}
bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
@@ -2453,6 +2460,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
case HLSL_IR_INTERLOCKED:
progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state);
+ break;
default:
break;
@@ -2894,6 +2902,16 @@ static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *b
++state->count;
}
+static void mark_store_groups_dirty(struct hlsl_ctx *ctx,
+ struct vectorize_stores_state *state, struct hlsl_ir_var *var)
+{
+ for (unsigned int i = 0; i < state->count; ++i)
+ {
+ if (state->groups[i].stores[0]->lhs.var == var)
+ state->groups[i].dirty = true;
+ }
+}
+
static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct vectorize_stores_state *state)
{
@@ -2907,20 +2925,21 @@ static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_blo
}
else if (instr->type == HLSL_IR_LOAD)
{
- struct hlsl_ir_var *var = hlsl_ir_load(instr)->src.var;
-
/* By vectorizing store A with store B, we are effectively moving
* store A down to happen at the same time as store B.
* If there was a load of the same variable between the two, this
* would be incorrect.
* Therefore invalidate all stores to this variable. As above, we
* could be more granular if necessary. */
-
- for (unsigned int i = 0; i < state->count; ++i)
- {
- if (state->groups[i].stores[0]->lhs.var == var)
- state->groups[i].dirty = true;
- }
+ mark_store_groups_dirty(ctx, state, hlsl_ir_load(instr)->src.var);
+ }
+ else if (instr->type == HLSL_IR_INTERLOCKED)
+ {
+ /* An interlocked operation can be used on shared memory variables,
+ * and it is at the same time both a store and a load, thus, we
+ * should also mark all stores to this variable as dirty once we
+ * find one.*/
+ mark_store_groups_dirty(ctx, state, hlsl_ir_interlocked(instr)->dst.var);
}
else if (instr->type == HLSL_IR_IF)
{
@@ -3338,6 +3357,59 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr
return true;
}
+struct stream_append_ctx
+{
+ struct hlsl_ir_function_decl *func;
+ bool created;
+};
+
+static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
+{
+ struct stream_append_ctx *append_ctx = context;
+ struct hlsl_ir_resource_store *store;
+ const struct hlsl_ir_node *rhs;
+ const struct hlsl_type *type;
+ struct hlsl_ir_var *var;
+ struct hlsl_block block;
+
+ if (instr->type != HLSL_IR_RESOURCE_STORE)
+ return false;
+
+ store = hlsl_ir_resource_store(instr);
+ if (store->store_type != HLSL_RESOURCE_STREAM_APPEND)
+ return false;
+
+ rhs = store->value.node;
+ var = store->resource.var;
+ type = hlsl_get_stream_output_type(var->data_type);
+
+ if (rhs->type != HLSL_IR_LOAD)
+ {
+ hlsl_fixme(ctx, &instr->loc, "Stream append rhs is not HLSL_IR_LOAD. Broadcast may be missing.");
+ return false;
+ }
+
+ VKD3D_ASSERT(var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated);
+
+ if (var->regs[HLSL_REGSET_STREAM_OUTPUTS].index)
+ {
+ hlsl_fixme(ctx, &instr->loc, "Append to an output stream with a nonzero stream index.");
+ return false;
+ }
+
+ hlsl_block_init(&block);
+
+ append_output_copy_recurse(ctx, &block, append_ctx->func, type->e.so.type, hlsl_ir_load(rhs), var->storage_modifiers,
+ &var->semantic, var->semantic.index, false, !append_ctx->created);
+ append_ctx->created = true;
+
+ list_move_before(&instr->entry, &block.instrs);
+ hlsl_src_remove(&store->value);
+
+ return true;
+
+}
+
static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
const struct hlsl_ir_node *rhs;
@@ -5127,11 +5199,12 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
case HLSL_IR_CALL:
case HLSL_IR_IF:
+ case HLSL_IR_INTERLOCKED:
case HLSL_IR_JUMP:
case HLSL_IR_LOOP:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
- case HLSL_IR_INTERLOCKED:
+ case HLSL_IR_SYNC:
break;
case HLSL_IR_STATEBLOCK_CONSTANT:
/* Stateblock constants should not appear in the shader program. */
@@ -5415,6 +5488,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
}
case HLSL_IR_CONSTANT:
case HLSL_IR_STRING_CONSTANT:
+ case HLSL_IR_SYNC:
break;
case HLSL_IR_COMPILE:
case HLSL_IR_SAMPLER_STATE:
@@ -6441,7 +6515,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
}
}
-static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
+static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
+ uint32_t *output_reg_count)
{
struct register_allocator in_prim_allocator = {0}, patch_constant_out_patch_allocator = {0};
struct register_allocator input_allocator = {0}, output_allocator = {0};
@@ -6478,6 +6553,8 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun
allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader);
}
+ *output_reg_count = output_allocator.reg_count;
+
vkd3d_free(in_prim_allocator.allocations);
vkd3d_free(patch_constant_out_patch_allocator.allocations);
vkd3d_free(input_allocator.allocations);
@@ -7641,6 +7718,42 @@ static void validate_and_record_stream_outputs(struct hlsl_ctx *ctx)
/* TODO: check that maxvertexcount * outputdatasize <= 1024. */
}
+static void validate_max_output_size(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
+ uint32_t output_reg_count)
+{
+ unsigned int max_output_size, comp_count = 0;
+ unsigned int *reg_comp_count;
+ struct hlsl_ir_var *var;
+ uint32_t id;
+
+ if (ctx->result)
+ return;
+
+ if (!(reg_comp_count = hlsl_calloc(ctx, output_reg_count, sizeof(*reg_comp_count))))
+ return;
+
+ LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ if (!var->is_output_semantic)
+ continue;
+
+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
+ id = var->regs[HLSL_REGSET_NUMERIC].id;
+ reg_comp_count[id] = max(reg_comp_count[id], vkd3d_log2i(var->regs[HLSL_REGSET_NUMERIC].writemask) + 1);
+ }
+
+ for (id = 0; id < output_reg_count; ++id)
+ comp_count += reg_comp_count[id];
+
+ max_output_size = ctx->max_vertex_count * comp_count;
+ if (max_output_size > 1024)
+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT,
+ "Max vertex count (%u) * output data component count (%u) = %u, which is greater than 1024.",
+ ctx->max_vertex_count, comp_count, max_output_size);
+
+ vkd3d_free(reg_comp_count);
+}
+
static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body)
{
struct hlsl_ir_node *instr, *next;
@@ -10718,8 +10831,20 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx,
if (store->store_type != HLSL_RESOURCE_STORE)
{
- hlsl_fixme(ctx, &instr->loc, "Stream output operations.");
- return false;
+ enum vkd3d_shader_opcode opcode = store->store_type == HLSL_RESOURCE_STREAM_APPEND
+ ? VKD3DSIH_EMIT : VKD3DSIH_CUT;
+
+ VKD3D_ASSERT(!store->value.node && !store->coords.node);
+ VKD3D_ASSERT(store->resource.var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated);
+
+ if (store->resource.var->regs[HLSL_REGSET_STREAM_OUTPUTS].index)
+ {
+ hlsl_fixme(ctx, &instr->loc, "Stream output operation with a nonzero stream index.");
+ return false;
+ }
+
+ ins = generate_vsir_add_program_instruction(ctx, program, &store->node.loc, opcode, 0, 0);
+ return !!ins;
}
if (!store->resource.var->is_uniform)
@@ -11264,6 +11389,19 @@ static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
}
}
+static bool sm4_generate_vsir_instr_sync(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_sync *sync)
+{
+ const struct hlsl_ir_node *instr = &sync->node;
+ struct vkd3d_shader_instruction *ins;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SYNC, 0, 0)))
+ return false;
+ ins->flags = sync->sync_flags;
+
+ return true;
+}
+
static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program);
static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff)
@@ -11414,6 +11552,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo
sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr));
break;
+ case HLSL_IR_SYNC:
+ sm4_generate_vsir_instr_sync(ctx, program, hlsl_ir_sync(instr));
+ break;
+
default:
break;
}
@@ -13236,6 +13378,8 @@ static void process_entry_function(struct hlsl_ctx *ctx,
struct hlsl_block static_initializers, global_uniforms;
struct hlsl_block *const body = &entry_func->body;
struct recursive_call_ctx recursive_call_ctx;
+ struct stream_append_ctx stream_append_ctx;
+ uint32_t output_reg_count;
struct hlsl_ir_var *var;
unsigned int i;
bool progress;
@@ -13461,6 +13605,10 @@ static void process_entry_function(struct hlsl_ctx *ctx,
{
allocate_stream_outputs(ctx);
validate_and_record_stream_outputs(ctx);
+
+ memset(&stream_append_ctx, 0, sizeof(stream_append_ctx));
+ stream_append_ctx.func = entry_func;
+ hlsl_transform_ir(ctx, lower_stream_appends, body, &stream_append_ctx);
}
if (profile->major_version < 4)
@@ -13519,7 +13667,10 @@ static void process_entry_function(struct hlsl_ctx *ctx,
allocate_register_reservations(ctx, &ctx->extern_vars);
allocate_register_reservations(ctx, &entry_func->extern_vars);
- allocate_semantic_registers(ctx, entry_func);
+ allocate_semantic_registers(ctx, entry_func, &output_reg_count);
+
+ if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
+ validate_max_output_size(ctx, entry_func, output_reg_count);
}
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index c29bedfaaa9..29b03871e05 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -4165,6 +4165,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VKD3DSIH_BREAK:
case VKD3DSIH_CASE:
case VKD3DSIH_CONTINUE:
+ case VKD3DSIH_CUT:
case VKD3DSIH_DEFAULT:
case VKD3DSIH_DISCARD:
case VKD3DSIH_DIV:
@@ -4178,6 +4179,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VKD3DSIH_DSY_COARSE:
case VKD3DSIH_DSY_FINE:
case VKD3DSIH_ELSE:
+ case VKD3DSIH_EMIT:
case VKD3DSIH_ENDIF:
case VKD3DSIH_ENDLOOP:
case VKD3DSIH_ENDSWITCH:
@@ -4213,6 +4215,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VKD3DSIH_IMM_ATOMIC_UMIN:
case VKD3DSIH_IMM_ATOMIC_OR:
case VKD3DSIH_IMM_ATOMIC_XOR:
+ case VKD3DSIH_SYNC:
case VKD3DSIH_IMUL:
case VKD3DSIH_INE:
case VKD3DSIH_INEG:
--
2.47.2