From df5bd15017140c87a2b2a3f5749cc5151afcd546 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 6 May 2025 06:42:46 +1000 Subject: [PATCH] Updated vkd3d to 4289ec60a1f79f68ea9bd3624141b5657b82d6c8. --- libs/vkd3d/include/private/vkd3d_version.h | 2 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 330 ++++++++++++-------- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 63 ++++ libs/vkd3d/libs/vkd3d-shader/hlsl.h | 19 ++ libs/vkd3d/libs/vkd3d-shader/hlsl.y | 70 ++++- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 231 +++++++++++--- libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 + 7 files changed, 544 insertions(+), 174 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_version.h b/libs/vkd3d/include/private/vkd3d_version.h index 795bc2dc490..fb2e2f11f8b 100644 --- a/libs/vkd3d/include/private/vkd3d_version.h +++ b/libs/vkd3d/include/private/vkd3d_version.h @@ -1 +1 @@ -#define VKD3D_VCS_ID " (git a4f58be0)" +#define VKD3D_VCS_ID " (git 4289ec60)" diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 775be85334e..52bab40b553 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -647,6 +647,7 @@ enum sm6_value_type VALUE_TYPE_REG, VALUE_TYPE_ICB, VALUE_TYPE_HANDLE, + VALUE_TYPE_SSA, }; struct sm6_function_data @@ -663,6 +664,11 @@ struct sm6_handle_data bool non_uniform; }; +struct sm6_ssa_data +{ + unsigned int id; +}; + struct sm6_value { const struct sm6_type *type; @@ -673,10 +679,11 @@ struct sm6_value union { struct sm6_function_data function; - struct vkd3d_shader_register reg; const struct vkd3d_shader_immediate_constant_buffer *icb; struct sm6_handle_data handle; + struct sm6_ssa_data ssa; } u; + struct vkd3d_shader_register reg; }; struct dxil_record @@ -2232,7 +2239,15 @@ static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_pa static inline bool sm6_value_is_register(const struct sm6_value *value) { - return value->value_type == VALUE_TYPE_REG; + switch (value->value_type) + { + case VALUE_TYPE_REG: + case VALUE_TYPE_SSA: + return true; + + default: + return false; + } } static bool sm6_value_is_handle(const struct sm6_value *value) @@ -2242,18 +2257,18 @@ static bool sm6_value_is_handle(const struct sm6_value *value) static inline bool sm6_value_is_constant(const struct sm6_value *value) { - return sm6_value_is_register(value) && register_is_constant(&value->u.reg); + return sm6_value_is_register(value) && register_is_constant(&value->reg); } static bool sm6_value_is_constant_zero(const struct sm6_value *value) { /* Constant vectors do not occur. */ - return sm6_value_is_register(value) && register_is_scalar_constant_zero(&value->u.reg); + return sm6_value_is_register(value) && register_is_scalar_constant_zero(&value->reg); } static inline bool sm6_value_is_undef(const struct sm6_value *value) { - return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; + return sm6_value_is_register(value) && value->reg.type == VKD3DSPR_UNDEF; } static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count) @@ -2272,26 +2287,26 @@ static bool sm6_value_is_icb(const struct sm6_value *value) static bool sm6_value_is_ssa(const struct sm6_value *value) { - return sm6_value_is_register(value) && register_is_ssa(&value->u.reg); + return sm6_value_is_register(value) && register_is_ssa(&value->reg); } static bool sm6_value_is_numeric_array(const struct sm6_value *value) { - return sm6_value_is_register(value) && register_is_numeric_array(&value->u.reg); + return sm6_value_is_register(value) && register_is_numeric_array(&value->reg); } static inline unsigned int sm6_value_get_constant_uint(const struct sm6_value *value) { if (!sm6_value_is_constant(value)) return UINT_MAX; - return register_get_uint_value(&value->u.reg); + return register_get_uint_value(&value->reg); } static uint64_t sm6_value_get_constant_uint64(const struct sm6_value *value) { if (!sm6_value_is_constant(value)) return UINT64_MAX; - return register_get_uint64_value(&value->u.reg); + return register_get_uint64_value(&value->reg); } static unsigned int sm6_parser_alloc_ssa_id(struct sm6_parser *sm6) @@ -2390,15 +2405,55 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_UINT; } +static void sm6_register_from_value(struct vkd3d_shader_register *reg, const struct sm6_value *value) +{ + switch (value->value_type) + { + case VALUE_TYPE_REG: + *reg = value->reg; + break; + + case VALUE_TYPE_SSA: + register_init_with_id(reg, VKD3DSPR_SSA, vkd3d_data_type_from_sm6_type( + sm6_type_get_scalar_type(value->type, 0)), value->u.ssa.id); + reg->dimension = sm6_type_is_scalar(value->type) ? VSIR_DIMENSION_SCALAR : VSIR_DIMENSION_VEC4; + break; + + case VALUE_TYPE_FUNCTION: + case VALUE_TYPE_HANDLE: + case VALUE_TYPE_ICB: + vkd3d_unreachable(); + } +} + +static void sm6_parser_init_ssa_value(struct sm6_parser *sm6, struct sm6_value *value) +{ + unsigned int id; + + if (register_is_ssa(&value->reg) && value->reg.idx[0].offset) + { + id = value->reg.idx[0].offset; + TRACE("Using forward-allocated id %u.\n", id); + } + else + { + id = sm6_parser_alloc_ssa_id(sm6); + } + + value->value_type = VALUE_TYPE_SSA; + value->u.ssa.id = id; + sm6_register_from_value(&value->reg, value); +} + static void register_init_ssa_vector(struct vkd3d_shader_register *reg, const struct sm6_type *type, unsigned int component_count, struct sm6_value *value, struct sm6_parser *sm6) { enum vkd3d_data_type data_type; unsigned int id; - if (value && register_is_ssa(&value->u.reg) && value->u.reg.idx[0].offset) + if (value && register_is_ssa(&value->reg) && value->reg.idx[0].offset) { - id = value->u.reg.idx[0].offset; + id = value->reg.idx[0].offset; TRACE("Using forward-allocated id %u.\n", id); } else @@ -2450,13 +2505,6 @@ static void dst_param_init_vector(struct vkd3d_shader_dst_param *param, unsigned param->shift = 0; } -static void dst_param_init_ssa_scalar(struct vkd3d_shader_dst_param *param, const struct sm6_type *type, - struct sm6_value *value, struct sm6_parser *sm6) -{ - dst_param_init(param); - register_init_ssa_scalar(¶m->reg, type, value, sm6); -} - static inline void src_param_init(struct vkd3d_shader_src_param *param) { param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); @@ -2480,7 +2528,7 @@ static void src_param_init_vector(struct vkd3d_shader_src_param *param, unsigned static void src_param_init_from_value(struct vkd3d_shader_src_param *param, const struct sm6_value *src) { src_param_init(param); - param->reg = src->u.reg; + sm6_register_from_value(¶m->reg, src); } static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, @@ -2547,9 +2595,9 @@ static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instructio if (!(param = instruction_dst_params_alloc(ins, 1, sm6))) return false; - dst_param_init_ssa_scalar(param, dst->type, dst, sm6); - param->write_mask = VKD3DSP_WRITEMASK_0; - dst->u.reg = param->reg; + dst_param_init(param); + sm6_parser_init_ssa_value(sm6, dst); + sm6_register_from_value(¶m->reg, dst); return true; } @@ -2560,8 +2608,8 @@ static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instructio struct sm6_value *dst = sm6_parser_get_current_value(sm6); dst_param_init_vector(param, component_count); - register_init_ssa_vector(¶m->reg, sm6_type_get_scalar_type(dst->type, 0), component_count, dst, sm6); - dst->u.reg = param->reg; + sm6_parser_init_ssa_value(sm6, dst); + sm6_register_from_value(¶m->reg, dst); } static bool instruction_dst_param_init_temp_vector(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) @@ -2576,7 +2624,7 @@ static bool instruction_dst_param_init_temp_vector(struct vkd3d_shader_instructi param->write_mask = VKD3DSP_WRITEMASK_ALL; param->reg.idx[0].offset = 0; param->reg.dimension = VSIR_DIMENSION_VEC4; - dst->u.reg = param->reg; + dst->reg = param->reg; return true; } @@ -2856,11 +2904,9 @@ static size_t sm6_parser_get_value_idx_by_ref(struct sm6_parser *sm6, const stru else { value->type = fwd_type; - value->value_type = VALUE_TYPE_REG; - register_init_with_id(&value->u.reg, VKD3DSPR_SSA, vkd3d_data_type_from_sm6_type( - sm6_type_get_scalar_type(fwd_type, 0)), sm6_parser_alloc_ssa_id(sm6)); - value->u.reg.dimension = sm6_type_is_scalar(fwd_type) ? VSIR_DIMENSION_SCALAR - : VSIR_DIMENSION_VEC4; + value->value_type = VALUE_TYPE_SSA; + value->u.ssa.id = sm6_parser_alloc_ssa_id(sm6); + sm6_register_from_value(&value->reg, value); } } @@ -2990,6 +3036,13 @@ static float register_get_float_value(const struct vkd3d_shader_register *reg) return bitcast_uint_to_float(reg->u.immconst_u32[0]); } +static inline float sm6_value_get_constant_float(const struct sm6_value *value) +{ + if (!sm6_value_is_constant(value)) + return UINT_MAX; + return register_get_float_value(&value->reg); +} + static enum vkd3d_result value_allocate_constant_array(struct sm6_value *dst, const struct sm6_type *type, const uint64_t *operands, struct sm6_parser *sm6) { @@ -3066,6 +3119,7 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c struct sm6_value *dst) { const struct sm6_type *elem_type, *pointee_type, *gep_type, *ptr_type; + struct vkd3d_shader_register reg; struct sm6_value *operands[3]; unsigned int i, j, offset; uint64_t value; @@ -3109,7 +3163,9 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c } } - if (operands[0]->u.reg.idx_count > 1) + sm6_register_from_value(®, operands[0]); + + if (reg.idx_count > 1) { WARN("Unsupported stacked GEP.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, @@ -3170,10 +3226,10 @@ static enum vkd3d_result sm6_parser_init_constexpr_gep(struct sm6_parser *sm6, c "Module does not define a pointer type for a constexpr GEP result."); return VKD3D_ERROR_INVALID_SHADER; } - dst->u.reg = operands[0]->u.reg; - dst->u.reg.idx[1].offset = offset; - dst->u.reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; - dst->u.reg.idx_count = 2; + dst->reg = reg; + dst->reg.idx[1].offset = offset; + dst->reg.idx[1].is_in_bounds = record->code == CST_CODE_CE_INBOUNDS_GEP; + dst->reg.idx_count = 2; return VKD3D_OK; } @@ -3232,7 +3288,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const dst->type = type; dst->value_type = VALUE_TYPE_REG; dst->is_back_ref = true; - vsir_register_init(&dst->u.reg, reg_type, reg_data_type, 0); + vsir_register_init(&dst->reg, reg_type, reg_data_type, 0); switch (record->code) { @@ -3257,9 +3313,9 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const value = decode_rotated_signed_value(record->operands[0]); if (type->u.width <= 32) - dst->u.reg.u.immconst_u32[0] = value & ((1ull << type->u.width) - 1); + dst->reg.u.immconst_u32[0] = value & ((1ull << type->u.width) - 1); else - dst->u.reg.u.immconst_u64[0] = value; + dst->reg.u.immconst_u64[0] = value; break; @@ -3274,11 +3330,11 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const } if (type->u.width == 16) - dst->u.reg.u.immconst_u32[0] = record->operands[0]; + dst->reg.u.immconst_u32[0] = record->operands[0]; else if (type->u.width == 32) - dst->u.reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]); + dst->reg.u.immconst_f32[0] = bitcast_uint_to_float(record->operands[0]); else if (type->u.width == 64) - dst->u.reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]); + dst->reg.u.immconst_f64[0] = bitcast_uint64_to_double(record->operands[0]); else vkd3d_unreachable(); @@ -3344,13 +3400,13 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const /* Resolve later in case forward refs exist. */ dst->type = type; - dst->u.reg.type = VKD3DSPR_COUNT; - dst->u.reg.idx[0].offset = value; + dst->reg.type = VKD3DSPR_COUNT; + dst->reg.idx[0].offset = value; break; case CST_CODE_UNDEF: dxil_record_validate_operand_max_count(record, 0, sm6); - dst->u.reg.type = VKD3DSPR_UNDEF; + dst->reg.type = VKD3DSPR_UNDEF; /* Mark as explicitly undefined, not the result of a missing constant code or instruction. */ dst->is_undefined = true; break; @@ -3359,7 +3415,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const FIXME("Unhandled constant code %u.\n", record->code); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, "Constant code %u is unhandled.", record->code); - dst->u.reg.type = VKD3DSPR_UNDEF; + dst->reg.type = VKD3DSPR_UNDEF; break; } @@ -3377,12 +3433,12 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const for (i = base_value_idx; i < sm6->value_count; ++i) { dst = &sm6->values[i]; - if (dst->u.reg.type != VKD3DSPR_COUNT) + if (dst->reg.type != VKD3DSPR_COUNT) continue; type = dst->type; - src = &sm6->values[dst->u.reg.idx[0].offset]; + src = &sm6->values[dst->reg.idx[0].offset]; if (!sm6_value_is_numeric_array(src)) { WARN("Value is not an array.\n"); @@ -3393,7 +3449,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const *dst = *src; dst->type = type; - dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type); + dst->reg.data_type = vkd3d_data_type_from_sm6_type(type->u.pointer.type); } return VKD3D_OK; @@ -3442,7 +3498,7 @@ static void sm6_parser_declare_icb(struct sm6_parser *sm6, const struct sm6_type ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER); /* The icb value index will be resolved later so forward references can be handled. */ ins->declaration.icb = (void *)(intptr_t)init; - register_init_with_id(&dst->u.reg, VKD3DSPR_IMMCONSTBUFFER, data_type, init); + register_init_with_id(&dst->reg, VKD3DSPR_IMMCONSTBUFFER, data_type, init); } static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const struct sm6_type *elem_type, @@ -3464,7 +3520,7 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru /* The initialiser value index will be resolved later so forward references can be handled. */ ins->declaration.indexable_temp.initialiser = (void *)(uintptr_t)init; - register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); + register_init_with_id(&dst->reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); } static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, @@ -3477,7 +3533,7 @@ static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6 ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); dst_param_init(&ins->declaration.tgsm_raw.reg); register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); - dst->u.reg = ins->declaration.tgsm_raw.reg.reg; + dst->reg = ins->declaration.tgsm_raw.reg.reg; dst->structure_stride = 0; ins->declaration.tgsm_raw.alignment = alignment; byte_count = elem_type->u.width / 8u; @@ -3503,7 +3559,7 @@ static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const str dst_param_init(&ins->declaration.tgsm_structured.reg); register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); - dst->u.reg = ins->declaration.tgsm_structured.reg.reg; + dst->reg = ins->declaration.tgsm_structured.reg.reg; structure_stride = elem_type->u.width / 8u; if (structure_stride != 4) { @@ -3812,11 +3868,11 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) const struct vkd3d_shader_immediate_constant_buffer *icb; struct sm6_value *value = &sm6->values[i]; - if (!sm6_value_is_register(value) || value->u.reg.type != VKD3DSPR_IMMCONSTBUFFER) + if (!sm6_value_is_register(value) || value->reg.type != VKD3DSPR_IMMCONSTBUFFER) continue; - if ((icb = resolve_forward_initialiser(value->u.reg.idx[0].offset, sm6))) - value->u.reg.idx[0].offset = icb->register_idx; + if ((icb = resolve_forward_initialiser(value->reg.idx[0].offset, sm6))) + value->reg.idx[0].offset = icb->register_idx; } return VKD3D_OK; @@ -4027,8 +4083,9 @@ struct function_emission_state unsigned int temp_idx; }; -static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, - unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg); +static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, + const struct vkd3d_shader_register *operand_regs, unsigned int component_count, + struct function_emission_state *state, struct vkd3d_shader_register *reg); static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) @@ -4136,11 +4193,11 @@ static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code) static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record, struct function_emission_state *state, struct sm6_value *dst) { - struct vkd3d_shader_register coord, const_offset, const_zero; - const struct vkd3d_shader_register *regs[2]; struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_register regs[2], reg; struct vkd3d_shader_instruction *ins; + struct vkd3d_shader_register coord; const struct sm6_value *ptr, *src; enum vkd3d_shader_opcode op; unsigned int i = 0; @@ -4152,7 +4209,9 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ || !sm6_value_validate_is_backward_ref(ptr, sm6)) return; - if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + sm6_register_from_value(®, ptr); + + if (reg.type != VKD3DSPR_GROUPSHAREDMEM) { WARN("Register is not groupshared.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, @@ -4187,17 +4246,11 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ if (ptr->structure_stride) { - if (ptr->u.reg.idx[1].rel_addr) - { - regs[0] = &ptr->u.reg.idx[1].rel_addr->reg; - } + if (reg.idx[1].rel_addr) + regs[0] = reg.idx[1].rel_addr->reg; else - { - register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset); - regs[0] = &const_offset; - } - register_make_constant_uint(&const_zero, 0); - regs[1] = &const_zero; + register_make_constant_uint(®s[0], reg.idx[1].offset); + register_make_constant_uint(®s[1], 0); if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord)) return; } @@ -4214,18 +4267,18 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ src_param_make_constant_uint(&src_params[0], 0); src_param_init_from_value(&src_params[1], src); + sm6_parser_init_ssa_value(sm6, dst); + dst_params = instruction_dst_params_alloc(ins, 2, sm6); - register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); + sm6_register_from_value(&dst_params[0].reg, dst); dst_param_init(&dst_params[0]); - dst_params[1].reg = ptr->u.reg; + dst_params[1].reg = reg; dst_params[1].reg.data_type = VKD3D_DATA_UNUSED; dst_params[1].reg.idx[1].rel_addr = NULL; dst_params[1].reg.idx[1].offset = ~0u; dst_params[1].reg.idx_count = 1; dst_param_init(&dst_params[1]); - - dst->u.reg = dst_params[0].reg; } static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, @@ -4401,9 +4454,9 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco dst_param_init(&dst_params[0]); dst_param_init(&dst_params[1]); - register_init_ssa_scalar(&dst_params[index].reg, a->type, dst, sm6); + sm6_parser_init_ssa_value(sm6, dst); + sm6_register_from_value(&dst_params[index].reg, dst); vsir_dst_param_init_null(&dst_params[index ^ 1]); - dst->u.reg = dst_params[index].reg; } else { @@ -4464,7 +4517,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record dxil_record_validate_operand_max_count(record, i, sm6); code_block->terminator.type = TERMINATOR_COND_BR; - code_block->terminator.conditional_reg = value->u.reg; + sm6_register_from_value(&code_block->terminator.conditional_reg, value); code_block->terminator.true_block = sm6_function_get_block(function, record->operands[0], sm6); code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); } @@ -4472,8 +4525,9 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record ins->opcode = VKD3DSIH_NOP; } -static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, - unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg) +static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, + const struct vkd3d_shader_register *operand_regs, unsigned int component_count, + struct function_emission_state *state, struct vkd3d_shader_register *reg) { struct vkd3d_shader_instruction *ins = state->ins; struct vkd3d_shader_src_param *src_params; @@ -4483,25 +4537,25 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, cons if (component_count == 1) { - *reg = *operand_regs[0]; + *reg = operand_regs[0]; return true; } for (i = 0; i < component_count; ++i) - all_constant &= register_is_constant(operand_regs[i]); + all_constant &= register_is_constant(&operand_regs[i]); if (all_constant) { - vsir_register_init(reg, VKD3DSPR_IMMCONST, operand_regs[0]->data_type, 0); + vsir_register_init(reg, VKD3DSPR_IMMCONST, operand_regs[0].data_type, 0); reg->dimension = VSIR_DIMENSION_VEC4; for (i = 0; i < component_count; ++i) - reg->u.immconst_u32[i] = operand_regs[i]->u.immconst_u32[0]; + reg->u.immconst_u32[i] = operand_regs[i].u.immconst_u32[0]; for (; i < VKD3D_VEC4_SIZE; ++i) reg->u.immconst_u32[i] = 0; return true; } - register_init_with_id(reg, VKD3DSPR_TEMP, operand_regs[0]->data_type, state->temp_idx++); + register_init_with_id(reg, VKD3DSPR_TEMP, operand_regs[0].data_type, state->temp_idx++); reg->dimension = VSIR_DIMENSION_VEC4; for (i = 0; i < component_count; ++i, ++ins) @@ -4512,7 +4566,7 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, cons return false; src_param_init(&src_params[0]); - src_params[0].reg = *operand_regs[i]; + src_params[0].reg = operand_regs[i]; if (!(dst_param = instruction_dst_params_alloc(ins, 1, sm6))) return false; @@ -4530,11 +4584,11 @@ static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, cons static bool sm6_parser_emit_composite_construct(struct sm6_parser *sm6, const struct sm6_value **operands, unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg) { - const struct vkd3d_shader_register *operand_regs[VKD3D_VEC4_SIZE]; + struct vkd3d_shader_register operand_regs[VKD3D_VEC4_SIZE]; unsigned int i; for (i = 0; i < component_count; ++i) - operand_regs[i] = &operands[i]->u.reg; + sm6_register_from_value(&operand_regs[i], operands[i]); return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); } @@ -4543,19 +4597,18 @@ static bool sm6_parser_emit_coordinate_construct(struct sm6_parser *sm6, const s unsigned int max_operands, const struct sm6_value *z_operand, struct function_emission_state *state, struct vkd3d_shader_register *reg) { - const struct vkd3d_shader_register *operand_regs[VKD3D_VEC4_SIZE]; + struct vkd3d_shader_register operand_regs[VKD3D_VEC4_SIZE]; unsigned int component_count; for (component_count = 0; component_count < max_operands; ++component_count) { if (!z_operand && operands[component_count]->is_undefined) break; - operand_regs[component_count] = &operands[component_count]->u.reg; + sm6_register_from_value(&operand_regs[component_count], operands[component_count]); } + if (z_operand) - { - operand_regs[component_count++] = &z_operand->u.reg; - } + sm6_register_from_value(&operand_regs[component_count++], z_operand); return sm6_parser_emit_reg_composite_construct(sm6, operand_regs, component_count, state, reg); } @@ -4780,7 +4833,7 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr } else { - reg = operands[coord_idx]->u.reg; + sm6_register_from_value(®, operands[coord_idx]); } for (i = coord_idx + coord_count; i < coord_idx + 3; ++i) @@ -4810,7 +4863,7 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr dst_param_init(&dst_params[1]); sm6_register_from_handle(sm6, &resource->u.handle, &dst_params[1].reg); - dst->u.reg = dst_params[0].reg; + dst->reg = dst_params[0].reg; } static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5957,7 +6010,7 @@ static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_ dst_param_init(&dst_params[1]); register_init_ssa_scalar(&dst_params[index].reg, dst->type, dst, sm6); vsir_dst_param_init_null(&dst_params[index ^ 1]); - dst->u.reg = dst_params[index].reg; + dst->reg = dst_params[index].reg; } static void sm6_parser_emit_dx_split_double(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -6574,7 +6627,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade return; type = sm6_type_get_scalar_type(dst->type, 0); - vsir_register_init(&dst->u.reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0); + vsir_register_init(&dst->reg, VKD3DSPR_UNDEF, vkd3d_data_type_from_sm6_type(type), 0); /* dst->is_undefined is not set here because it flags only explicitly undefined values. */ } @@ -6680,7 +6733,7 @@ static void sm6_parser_emit_call(struct sm6_parser *sm6, const struct dxil_recor "Expected a constant integer dx intrinsic function id."); return; } - sm6_parser_decode_dx_op(sm6, register_get_uint_value(&op_value->u.reg), + sm6_parser_decode_dx_op(sm6, sm6_value_get_constant_uint(op_value), fn_value->u.function.name, &operands[1], operand_count - 1, state, dst); } @@ -6826,10 +6879,10 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor if (handler_idx == VKD3DSIH_NOP) { - dst->u.reg = value->u.reg; + sm6_register_from_value(&dst->reg, value); /* Set the result type for casts from 16-bit min precision. */ if (type->u.width != 16) - dst->u.reg.data_type = vkd3d_data_type_from_sm6_type(type); + dst->reg.data_type = vkd3d_data_type_from_sm6_type(type); return; } @@ -6841,7 +6894,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor /* bitcast */ if (handler_idx == VKD3DSIH_MOV) - src_param->reg.data_type = dst->u.reg.data_type; + src_param->reg.data_type = dst->reg.data_type; } struct sm6_cmp_info @@ -6994,6 +7047,7 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr, *cmp, *new; + struct vkd3d_shader_register reg; unsigned int i = 0; bool is_volatile; uint64_t code; @@ -7003,7 +7057,9 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re || !sm6_value_validate_is_backward_ref(ptr, sm6)) return; - if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM) + sm6_register_from_value(®, ptr); + + if (reg.type != VKD3DSPR_GROUPSHAREDMEM) { WARN("Register is not groupshared.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, @@ -7063,10 +7119,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re return; register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6); dst_param_init(&dst_params[0]); - dst_params[1].reg = ptr->u.reg; + dst_params[1].reg = reg; dst_param_init(&dst_params[1]); - dst->u.reg = dst_params[0].reg; + dst->reg = dst_params[0].reg; } static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7122,7 +7178,7 @@ static void sm6_parser_emit_extractval(struct sm6_parser *sm6, const struct dxil if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; - src_param->reg = src->u.reg; + sm6_register_from_value(&src_param->reg, src); src_param_init_scalar(src_param, elem_idx); instruction_dst_param_init_ssa_scalar(ins, sm6); @@ -7135,7 +7191,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record unsigned int elem_idx, operand_idx = 2; enum bitcode_address_space addr_space; const struct sm6_value *elem_value; - struct vkd3d_shader_register *reg; + struct vkd3d_shader_register reg; const struct sm6_value *src; bool is_in_bounds; @@ -7149,7 +7205,9 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record return; } - if (src->u.reg.idx_count > 1) + sm6_register_from_value(®, src); + + if (reg.idx_count > 1) { WARN("Unsupported stacked GEP.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, @@ -7218,12 +7276,12 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record return; } - reg = &dst->u.reg; - *reg = src->u.reg; - reg->idx[1].offset = 0; - register_index_address_init(®->idx[1], elem_value, sm6); - reg->idx[1].is_in_bounds = is_in_bounds; - reg->idx_count = 2; + reg.idx[1].offset = 0; + register_index_address_init(®.idx[1], elem_value, sm6); + reg.idx[1].is_in_bounds = is_in_bounds; + reg.idx_count = 2; + + dst->reg = reg; dst->structure_stride = src->structure_stride; ins->opcode = VKD3DSIH_NOP; @@ -7235,6 +7293,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor const struct sm6_type *elem_type = NULL, *pointee_type; unsigned int alignment, operand_count, i = 0; struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_register reg; const struct sm6_value *ptr; uint64_t alignment_code; @@ -7272,25 +7331,29 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); + sm6_register_from_value(®, ptr); + if (ptr->structure_stride) { - VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + VKD3D_ASSERT(reg.type == VKD3DSPR_GROUPSHAREDMEM); vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) return; - if (ptr->u.reg.idx[1].rel_addr) - src_params[0] = *ptr->u.reg.idx[1].rel_addr; + if (reg.idx[1].rel_addr) + src_params[0] = *reg.idx[1].rel_addr; else - src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + src_param_make_constant_uint(&src_params[0], reg.idx[1].offset); /* Struct offset is always zero as there is no struct, just an array. */ src_param_make_constant_uint(&src_params[1], 0); src_param_init_from_value(&src_params[2], ptr); src_params[2].reg.alignment = alignment; + /* The offset is already in src_params[0]. */ + src_params[2].reg.idx_count = 1; } else { - operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + operand_count = 1 + (reg.type == VKD3DSPR_GROUPSHAREDMEM); vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) @@ -7341,11 +7404,11 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record } dst->type = type; - register_init_ssa_scalar(&dst->u.reg, type, dst, sm6); + register_init_ssa_scalar(&dst->reg, type, dst, sm6); if (!(phi = sm6_block_phi_require_space(code_block, sm6))) return; - phi->reg = dst->u.reg; + sm6_register_from_value(&phi->reg, dst); phi->incoming_count = record->operand_count / 2u; if (!vkd3d_array_reserve((void **)&phi->incoming, &phi->incoming_capacity, phi->incoming_count, @@ -7421,6 +7484,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; const struct sm6_value *ptr, *src; + struct vkd3d_shader_register reg; uint64_t alignment_code; if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) @@ -7455,24 +7519,26 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); + sm6_register_from_value(®, ptr); + if (ptr->structure_stride) { - VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + VKD3D_ASSERT(reg.type == VKD3DSPR_GROUPSHAREDMEM); vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) return; - if (ptr->u.reg.idx[1].rel_addr) - src_params[0] = *ptr->u.reg.idx[1].rel_addr; + if (reg.idx[1].rel_addr) + src_params[0] = *reg.idx[1].rel_addr; else - src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + src_param_make_constant_uint(&src_params[0], reg.idx[1].offset); /* Struct offset is always zero as there is no struct, just an array. */ src_param_make_constant_uint(&src_params[1], 0); src_param_init_from_value(&src_params[2], src); } else { - operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + operand_count = 1 + (reg.type == VKD3DSPR_GROUPSHAREDMEM); vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) @@ -7484,7 +7550,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco dst_param = instruction_dst_params_alloc(ins, 1, sm6); dst_param_init(dst_param); - dst_param->reg = ptr->u.reg; + dst_param->reg = reg; dst_param->reg.alignment = alignment; /* Groupshared stores contain the address in the src params. */ if (dst_param->reg.type != VKD3DSPR_IDXTEMP) @@ -7529,7 +7595,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec return; } - terminator->conditional_reg = src->u.reg; + sm6_register_from_value(&terminator->conditional_reg, src); terminator->type = TERMINATOR_SWITCH; terminator->case_count = record->operand_count / 2u; @@ -7645,7 +7711,7 @@ static bool sm6_metadata_get_uint_value(const struct sm6_parser *sm6, if (!sm6_type_is_integer(value->type)) return false; - *u = register_get_uint_value(&value->u.reg); + *u = sm6_value_get_constant_uint(value); return true; } @@ -7664,7 +7730,7 @@ static bool sm6_metadata_get_uint64_value(const struct sm6_parser *sm6, if (!sm6_type_is_integer(value->type)) return false; - *u = register_get_uint64_value(&value->u.reg); + *u = sm6_value_get_constant_uint(value); return true; } @@ -7683,7 +7749,7 @@ static bool sm6_metadata_get_float_value(const struct sm6_parser *sm6, if (!sm6_type_is_floating_point(value->type)) return false; - *f = register_get_float_value(&value->u.reg); + *f = sm6_value_get_constant_float(value); return true; } @@ -7868,7 +7934,7 @@ static void metadata_attachment_record_apply(const struct dxil_record *record, e } else if (metadata_node_get_unary_uint(node, &operand, sm6)) { - dst->u.reg.non_uniform = !!operand; + dst->reg.non_uniform = !!operand; } } else @@ -7940,13 +8006,13 @@ static enum vkd3d_result sm6_function_resolve_phi_incomings(const struct sm6_fun "A PHI incoming value is not a constant or SSA register."); return VKD3D_ERROR_INVALID_SHADER; } - if (src->u.reg.data_type != phi->reg.data_type) + if (src->reg.data_type != phi->reg.data_type) { WARN("Type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, "The type of a phi incoming value does not match the result type."); } - phi->incoming[j].reg = src->u.reg; + sm6_register_from_value(&phi->incoming[j].reg, src); } } } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index a6b46474812..0f9aafbe13e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -2291,6 +2291,26 @@ struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interl return &interlocked->node; } +static struct hlsl_ir_node *hlsl_new_sync(struct hlsl_ctx *ctx, + uint32_t sync_flags, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_sync *sync; + + if (!(sync = hlsl_alloc(ctx, sizeof(*sync)))) + return NULL; + + init_node(&sync->node, HLSL_IR_SYNC, NULL, loc); + sync->sync_flags = sync_flags; + + return &sync->node; +} + +struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block, + uint32_t sync_flags, const struct vkd3d_shader_location *loc) +{ + return append_new_instr(ctx, block, hlsl_new_sync(ctx, sync_flags, loc)); +} + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) { struct hlsl_type *type = index->val.node->data_type; @@ -2681,6 +2701,18 @@ static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx, return &dst->node; } +static struct hlsl_ir_node *clone_sync(struct hlsl_ctx *ctx, struct hlsl_ir_sync *src) +{ + struct hlsl_ir_sync *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_SYNC, NULL, &src->node.loc); + dst->sync_flags = src->sync_flags; + + return &dst->node; +} + static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_compile *compile) { @@ -2884,6 +2916,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_INTERLOCKED: return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr)); + case HLSL_IR_SYNC: + return clone_sync(ctx, hlsl_ir_sync(instr)); + case HLSL_IR_COMPILE: return clone_compile(ctx, map, hlsl_ir_compile(instr)); @@ -3341,7 +3376,9 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_STORE ] = "HLSL_IR_STORE", [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED", + [HLSL_IR_SYNC ] = "HLSL_IR_SYNC", [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", @@ -3831,6 +3868,19 @@ static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct vkd3d_string_buffer_printf(buffer, ")"); } +static void dump_ir_sync(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_sync *sync) +{ + vkd3d_string_buffer_printf(buffer, "sync"); + if (sync->sync_flags & VKD3DSSF_GLOBAL_UAV) + vkd3d_string_buffer_printf(buffer, "_uglobal"); + if (sync->sync_flags & VKD3DSSF_THREAD_GROUP_UAV) + vkd3d_string_buffer_printf(buffer, "_ugroup"); + if (sync->sync_flags & VKD3DSSF_GROUP_SHARED_MEMORY) + vkd3d_string_buffer_printf(buffer, "_g"); + if (sync->sync_flags & VKD3DSSF_THREAD_GROUP) + vkd3d_string_buffer_printf(buffer, "_t"); +} + static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_compile *compile) { @@ -3968,6 +4018,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr)); break; + case HLSL_IR_SYNC: + dump_ir_sync(buffer, hlsl_ir_sync(instr)); + break; + case HLSL_IR_COMPILE: dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); break; @@ -4205,6 +4259,11 @@ static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked) vkd3d_free(interlocked); } +static void free_ir_sync(struct hlsl_ir_sync *sync) +{ + vkd3d_free(sync); +} + static void free_ir_compile(struct hlsl_ir_compile *compile) { unsigned int i; @@ -4295,6 +4354,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_interlocked(hlsl_ir_interlocked(node)); break; + case HLSL_IR_SYNC: + free_ir_sync(hlsl_ir_sync(node)); + break; + case HLSL_IR_COMPILE: free_ir_compile(hlsl_ir_compile(node)); break; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 8cb805a2e66..9eb86534f81 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -329,7 +329,9 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + HLSL_IR_INTERLOCKED, + HLSL_IR_SYNC, HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, @@ -1006,6 +1008,15 @@ struct hlsl_ir_interlocked struct hlsl_src coords, cmp_value, value; }; +/* Represents a thread synchronization instruction such as GroupMemoryBarrier().*/ +struct hlsl_ir_sync +{ + struct hlsl_ir_node node; + + /* Flags from enum vkd3d_shader_sync_flags. */ + uint32_t sync_flags; +}; + struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -1343,6 +1354,12 @@ static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node); } +static inline struct hlsl_ir_sync *hlsl_ir_sync(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_SYNC); + return CONTAINING_RECORD(node, struct hlsl_ir_sync, node); +} + static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); @@ -1582,6 +1599,8 @@ void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, unsigned int writemask, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_block_add_sync(struct hlsl_ctx *ctx, struct hlsl_block *block, + uint32_t sync_flags, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, unsigned int n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_unary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 702fd30bda3..05657d27b38 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -574,13 +574,14 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx /* fall-through */ case HLSL_IR_CALL: case HLSL_IR_IF: + case HLSL_IR_INTERLOCKED: case HLSL_IR_LOOP: case HLSL_IR_JUMP: case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: - case HLSL_IR_INTERLOCKED: case HLSL_IR_STATEBLOCK_CONSTANT: + case HLSL_IR_SYNC: hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); break; @@ -5110,6 +5111,67 @@ static bool intrinsic_InterlockedXor(struct hlsl_ctx *ctx, return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_XOR, params, loc, "InterlockedXor"); } +static void validate_group_barrier_profile(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) +{ + if (ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE || hlsl_version_lt(ctx, 5, 0)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Group barriers can only be used in compute shaders 5.0 or higher."); + } +} + +static bool intrinsic_AllMemoryBarrier(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV + | VKD3DSSF_GROUP_SHARED_MEMORY, loc); +} + +static bool intrinsic_AllMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV + | VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc); +} + +static bool intrinsic_DeviceMemoryBarrier(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + if ((ctx->profile->type != VKD3D_SHADER_TYPE_COMPUTE && ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL) + || hlsl_version_lt(ctx, 5, 0)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "DeviceMemoryBarrier() can only be used in pixel and compute shaders 5.0 or higher."); + } + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV, loc); +} + +static bool intrinsic_DeviceMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, VKD3DSSF_GLOBAL_UAV + | VKD3DSSF_THREAD_GROUP, loc); +} + +static bool intrinsic_GroupMemoryBarrier(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, + VKD3DSSF_GROUP_SHARED_MEMORY, loc); +} + +static bool intrinsic_GroupMemoryBarrierWithGroupSync(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + validate_group_barrier_profile(ctx, loc); + return !!hlsl_block_add_sync(ctx, params->instrs, + VKD3DSSF_GROUP_SHARED_MEMORY | VKD3DSSF_THREAD_GROUP, loc); +} + static const struct intrinsic_function { const char *name; @@ -5121,8 +5183,14 @@ static const struct intrinsic_function intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ + {"AllMemoryBarrier", 0, true, intrinsic_AllMemoryBarrier}, + {"AllMemoryBarrierWithGroupSync", 0, true, intrinsic_AllMemoryBarrierWithGroupSync}, {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, + {"DeviceMemoryBarrier", 0, true, intrinsic_DeviceMemoryBarrier}, + {"DeviceMemoryBarrierWithGroupSync", 0, true, intrinsic_DeviceMemoryBarrierWithGroupSync}, {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, + {"GroupMemoryBarrier", 0, true, intrinsic_GroupMemoryBarrier}, + {"GroupMemoryBarrierWithGroupSync", 0, true, intrinsic_GroupMemoryBarrierWithGroupSync}, {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd}, {"InterlockedAnd", -1, true, intrinsic_InterlockedAnd}, {"InterlockedCompareExchange", 4, true, intrinsic_InterlockedCompareExchange}, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index bc14885af2b..9c3affda534 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -288,7 +288,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, - uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc) + uint32_t index, bool output, bool force_align, bool create, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; struct hlsl_ir_var *ext_var; @@ -311,6 +311,11 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir || ext_var->data_type->class <= HLSL_CLASS_VECTOR); VKD3D_ASSERT(hlsl_type_is_primitive_array(type) || type->class <= HLSL_CLASS_VECTOR); + vkd3d_free(new_name); + + if (!create) + return ext_var; + if (output) { if (index >= semantic->reported_duplicated_output_next_index) @@ -336,11 +341,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir } } - vkd3d_free(new_name); return ext_var; } } + VKD3D_ASSERT(create); + if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) { vkd3d_free(new_name); @@ -429,7 +435,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec prim_type_src->modifiers = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK; if (!(input = add_semantic_var(ctx, func, var, prim_type_src, - modifiers, semantic, semantic_index + i, false, force_align, loc))) + modifiers, semantic, semantic_index + i, false, force_align, true, loc))) return; hlsl_init_simple_deref_from_var(&prim_deref, input); @@ -442,7 +448,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec else { if (!(input = add_semantic_var(ctx, func, var, vector_type_src, - modifiers, semantic, semantic_index + i, false, force_align, loc))) + modifiers, semantic, semantic_index + i, false, force_align, true, loc))) return; if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) @@ -550,9 +556,9 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function list_move_head(&func->body.instrs, &block.instrs); } -static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, - struct hlsl_ir_load *rhs, uint32_t modifiers, - struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) +static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, + struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create) { struct hlsl_type *type = rhs->node.data_type, *vector_type; struct vkd3d_shader_location *loc = &rhs->node.loc; @@ -582,49 +588,48 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec struct hlsl_ir_node *load; if (!(output = add_semantic_var(ctx, func, var, vector_type, - modifiers, semantic, semantic_index + i, true, force_align, loc))) + modifiers, semantic, semantic_index + i, true, force_align, create, loc))) return; if (type->class == HLSL_CLASS_MATRIX) { - c = hlsl_block_add_uint_constant(ctx, &func->body, i, &var->loc); - load = hlsl_block_add_load_index(ctx, &func->body, &rhs->src, c, &var->loc); + c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc); + load = hlsl_block_add_load_index(ctx, block, &rhs->src, c, &var->loc); } else { VKD3D_ASSERT(i == 0); - load = hlsl_block_add_load_index(ctx, &func->body, &rhs->src, NULL, &var->loc); + load = hlsl_block_add_load_index(ctx, block, &rhs->src, NULL, &var->loc); } - hlsl_block_add_simple_store(ctx, &func->body, output, load); + hlsl_block_add_simple_store(ctx, block, output, load); } } -static void append_output_copy_recurse(struct hlsl_ctx *ctx, - struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, - struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) +static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_function_decl *func, const struct hlsl_type *type, struct hlsl_ir_load *rhs, uint32_t modifiers, + struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create) { struct vkd3d_shader_location *loc = &rhs->node.loc; - struct hlsl_type *type = rhs->node.data_type; struct hlsl_ir_var *var = rhs->src.var; struct hlsl_ir_node *c; unsigned int i; if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT) { - struct hlsl_ir_load *element_load; - struct hlsl_struct_field *field; - uint32_t elem_semantic_index; - for (i = 0; i < hlsl_type_element_count(type); ++i) { - uint32_t element_modifiers; + uint32_t element_modifiers, elem_semantic_index; + const struct hlsl_type *element_type; + struct hlsl_ir_load *element_load; + struct hlsl_struct_field *field; if (type->class == HLSL_CLASS_ARRAY) { elem_semantic_index = semantic_index + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; + element_type = type->e.array.type; element_modifiers = modifiers; force_align = true; } @@ -637,23 +642,24 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, semantic = &field->semantic; elem_semantic_index = semantic->index; loc = &field->loc; + element_type = field->type; element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); force_align = (i == 0); } - c = hlsl_block_add_uint_constant(ctx, &func->body, i, &var->loc); + c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc); if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) return; - hlsl_block_add_instr(&func->body, &element_load->node); + hlsl_block_add_instr(block, &element_load->node); - append_output_copy_recurse(ctx, func, element_load, element_modifiers, - semantic, elem_semantic_index, force_align); + append_output_copy_recurse(ctx, block, func, element_type, element_load, element_modifiers, semantic, + elem_semantic_index, force_align, create); } } else { - append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align); + append_output_copy(ctx, block, func, rhs, modifiers, semantic, semantic_index, force_align, create); } } @@ -669,7 +675,8 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function return; hlsl_block_add_instr(&func->body, &load->node); - append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); + append_output_copy_recurse(ctx, &func->body, func, var->data_type, load, var->storage_modifiers, + &var->semantic, var->semantic.index, false, true); } bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), @@ -2453,6 +2460,7 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b case HLSL_IR_INTERLOCKED: progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); + break; default: break; @@ -2894,6 +2902,16 @@ static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *b ++state->count; } +static void mark_store_groups_dirty(struct hlsl_ctx *ctx, + struct vectorize_stores_state *state, struct hlsl_ir_var *var) +{ + for (unsigned int i = 0; i < state->count; ++i) + { + if (state->groups[i].stores[0]->lhs.var == var) + state->groups[i].dirty = true; + } +} + static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vectorize_stores_state *state) { @@ -2907,20 +2925,21 @@ static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_blo } else if (instr->type == HLSL_IR_LOAD) { - struct hlsl_ir_var *var = hlsl_ir_load(instr)->src.var; - /* By vectorizing store A with store B, we are effectively moving * store A down to happen at the same time as store B. * If there was a load of the same variable between the two, this * would be incorrect. * Therefore invalidate all stores to this variable. As above, we * could be more granular if necessary. */ - - for (unsigned int i = 0; i < state->count; ++i) - { - if (state->groups[i].stores[0]->lhs.var == var) - state->groups[i].dirty = true; - } + mark_store_groups_dirty(ctx, state, hlsl_ir_load(instr)->src.var); + } + else if (instr->type == HLSL_IR_INTERLOCKED) + { + /* An interlocked operation can be used on shared memory variables, + * and it is at the same time both a store and a load, thus, we + * should also mark all stores to this variable as dirty once we + * find one.*/ + mark_store_groups_dirty(ctx, state, hlsl_ir_interlocked(instr)->dst.var); } else if (instr->type == HLSL_IR_IF) { @@ -3338,6 +3357,59 @@ static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; } +struct stream_append_ctx +{ + struct hlsl_ir_function_decl *func; + bool created; +}; + +static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct stream_append_ctx *append_ctx = context; + struct hlsl_ir_resource_store *store; + const struct hlsl_ir_node *rhs; + const struct hlsl_type *type; + struct hlsl_ir_var *var; + struct hlsl_block block; + + if (instr->type != HLSL_IR_RESOURCE_STORE) + return false; + + store = hlsl_ir_resource_store(instr); + if (store->store_type != HLSL_RESOURCE_STREAM_APPEND) + return false; + + rhs = store->value.node; + var = store->resource.var; + type = hlsl_get_stream_output_type(var->data_type); + + if (rhs->type != HLSL_IR_LOAD) + { + hlsl_fixme(ctx, &instr->loc, "Stream append rhs is not HLSL_IR_LOAD. Broadcast may be missing."); + return false; + } + + VKD3D_ASSERT(var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated); + + if (var->regs[HLSL_REGSET_STREAM_OUTPUTS].index) + { + hlsl_fixme(ctx, &instr->loc, "Append to an output stream with a nonzero stream index."); + return false; + } + + hlsl_block_init(&block); + + append_output_copy_recurse(ctx, &block, append_ctx->func, type->e.so.type, hlsl_ir_load(rhs), var->storage_modifiers, + &var->semantic, var->semantic.index, false, !append_ctx->created); + append_ctx->created = true; + + list_move_before(&instr->entry, &block.instrs); + hlsl_src_remove(&store->value); + + return true; + +} + static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { const struct hlsl_ir_node *rhs; @@ -5127,11 +5199,12 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_CALL: case HLSL_IR_IF: + case HLSL_IR_INTERLOCKED: case HLSL_IR_JUMP: case HLSL_IR_LOOP: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: - case HLSL_IR_INTERLOCKED: + case HLSL_IR_SYNC: break; case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ @@ -5415,6 +5488,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop } case HLSL_IR_CONSTANT: case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SYNC: break; case HLSL_IR_COMPILE: case HLSL_IR_SAMPLER_STATE: @@ -6441,7 +6515,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } } -static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint32_t *output_reg_count) { struct register_allocator in_prim_allocator = {0}, patch_constant_out_patch_allocator = {0}; struct register_allocator input_allocator = {0}, output_allocator = {0}; @@ -6478,6 +6553,8 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader); } + *output_reg_count = output_allocator.reg_count; + vkd3d_free(in_prim_allocator.allocations); vkd3d_free(patch_constant_out_patch_allocator.allocations); vkd3d_free(input_allocator.allocations); @@ -7641,6 +7718,42 @@ static void validate_and_record_stream_outputs(struct hlsl_ctx *ctx) /* TODO: check that maxvertexcount * outputdatasize <= 1024. */ } +static void validate_max_output_size(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint32_t output_reg_count) +{ + unsigned int max_output_size, comp_count = 0; + unsigned int *reg_comp_count; + struct hlsl_ir_var *var; + uint32_t id; + + if (ctx->result) + return; + + if (!(reg_comp_count = hlsl_calloc(ctx, output_reg_count, sizeof(*reg_comp_count)))) + return; + + LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (!var->is_output_semantic) + continue; + + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + id = var->regs[HLSL_REGSET_NUMERIC].id; + reg_comp_count[id] = max(reg_comp_count[id], vkd3d_log2i(var->regs[HLSL_REGSET_NUMERIC].writemask) + 1); + } + + for (id = 0; id < output_reg_count; ++id) + comp_count += reg_comp_count[id]; + + max_output_size = ctx->max_vertex_count * comp_count; + if (max_output_size > 1024) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT, + "Max vertex count (%u) * output data component count (%u) = %u, which is greater than 1024.", + ctx->max_vertex_count, comp_count, max_output_size); + + vkd3d_free(reg_comp_count); +} + static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) { struct hlsl_ir_node *instr, *next; @@ -10718,8 +10831,20 @@ static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, if (store->store_type != HLSL_RESOURCE_STORE) { - hlsl_fixme(ctx, &instr->loc, "Stream output operations."); - return false; + enum vkd3d_shader_opcode opcode = store->store_type == HLSL_RESOURCE_STREAM_APPEND + ? VKD3DSIH_EMIT : VKD3DSIH_CUT; + + VKD3D_ASSERT(!store->value.node && !store->coords.node); + VKD3D_ASSERT(store->resource.var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated); + + if (store->resource.var->regs[HLSL_REGSET_STREAM_OUTPUTS].index) + { + hlsl_fixme(ctx, &instr->loc, "Stream output operation with a nonzero stream index."); + return false; + } + + ins = generate_vsir_add_program_instruction(ctx, program, &store->node.loc, opcode, 0, 0); + return !!ins; } if (!store->resource.var->is_uniform) @@ -11264,6 +11389,19 @@ static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, } } +static bool sm4_generate_vsir_instr_sync(struct hlsl_ctx *ctx, + struct vsir_program *program, const struct hlsl_ir_sync *sync) +{ + const struct hlsl_ir_node *instr = &sync->node; + struct vkd3d_shader_instruction *ins; + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SYNC, 0, 0))) + return false; + ins->flags = sync->sync_flags; + + return true; +} + static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) @@ -11414,6 +11552,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr)); break; + case HLSL_IR_SYNC: + sm4_generate_vsir_instr_sync(ctx, program, hlsl_ir_sync(instr)); + break; + default: break; } @@ -13236,6 +13378,8 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct hlsl_block static_initializers, global_uniforms; struct hlsl_block *const body = &entry_func->body; struct recursive_call_ctx recursive_call_ctx; + struct stream_append_ctx stream_append_ctx; + uint32_t output_reg_count; struct hlsl_ir_var *var; unsigned int i; bool progress; @@ -13461,6 +13605,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, { allocate_stream_outputs(ctx); validate_and_record_stream_outputs(ctx); + + memset(&stream_append_ctx, 0, sizeof(stream_append_ctx)); + stream_append_ctx.func = entry_func; + hlsl_transform_ir(ctx, lower_stream_appends, body, &stream_append_ctx); } if (profile->major_version < 4) @@ -13519,7 +13667,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, allocate_register_reservations(ctx, &ctx->extern_vars); allocate_register_reservations(ctx, &entry_func->extern_vars); - allocate_semantic_registers(ctx, entry_func); + allocate_semantic_registers(ctx, entry_func, &output_reg_count); + + if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY) + validate_max_output_size(ctx, entry_func, output_reg_count); } int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index c29bedfaaa9..29b03871e05 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -4165,6 +4165,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_BREAK: case VKD3DSIH_CASE: case VKD3DSIH_CONTINUE: + case VKD3DSIH_CUT: case VKD3DSIH_DEFAULT: case VKD3DSIH_DISCARD: case VKD3DSIH_DIV: @@ -4178,6 +4179,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_DSY_COARSE: case VKD3DSIH_DSY_FINE: case VKD3DSIH_ELSE: + case VKD3DSIH_EMIT: case VKD3DSIH_ENDIF: case VKD3DSIH_ENDLOOP: case VKD3DSIH_ENDSWITCH: @@ -4213,6 +4215,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_IMM_ATOMIC_UMIN: case VKD3DSIH_IMM_ATOMIC_OR: case VKD3DSIH_IMM_ATOMIC_XOR: + case VKD3DSIH_SYNC: case VKD3DSIH_IMUL: case VKD3DSIH_INE: case VKD3DSIH_INEG: -- 2.47.2