wine-staging/patches/vkd3d-latest/0005-Updated-vkd3d-to-abf76372e0a44dd3920e3fb796d75e920d0.patch
2024-03-22 17:43:13 +11:00

1804 lines
71 KiB
Diff

From 03778823649d7632c5c98c04728517d2b2ca3ec8 Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Fri, 15 Mar 2024 16:37:26 +1100
Subject: [PATCH] Updated vkd3d to abf76372e0a44dd3920e3fb796d75e920d0c07bb.
---
libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 8 +-
libs/vkd3d/libs/vkd3d-shader/dxil.c | 229 +++++++-
libs/vkd3d/libs/vkd3d-shader/glsl.c | 18 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 10 +
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 +
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 309 +++++++++--
libs/vkd3d/libs/vkd3d-shader/ir.c | 494 ++++++++++++++++--
libs/vkd3d/libs/vkd3d-shader/spirv.c | 20 +-
libs/vkd3d/libs/vkd3d-shader/tpf.c | 4 +-
.../libs/vkd3d-shader/vkd3d_shader_main.c | 12 +-
.../libs/vkd3d-shader/vkd3d_shader_private.h | 17 +-
11 files changed, 989 insertions(+), 135 deletions(-)
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
index 57dd0258aef..9f153a1da04 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
@@ -1358,7 +1358,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi
sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i);
if (!sm1->p.failed)
- ret = vsir_validate(&sm1->p);
+ ret = vkd3d_shader_parser_validate(&sm1->p);
if (sm1->p.failed && ret >= 0)
ret = VKD3D_ERROR_INVALID_SHADER;
@@ -2334,10 +2334,14 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b
break;
case HLSL_OP2_SLT:
+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
+ hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders.");
write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg);
break;
case HLSL_OP3_CMP:
+ if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX)
+ hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders.");
write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg);
break;
@@ -2496,7 +2500,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *
if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX)
{
- FIXME("Matrix writemasks need to be lowered.\n");
+ hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n");
return;
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
index de51588b513..86671c07d70 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
@@ -393,6 +393,7 @@ enum dx_intrinsic_opcode
DX_TEXTURE_GATHER_CMP = 74,
DX_ATOMIC_BINOP = 78,
DX_ATOMIC_CMP_XCHG = 79,
+ DX_BARRIER = 80,
DX_DERIV_COARSEX = 83,
DX_DERIV_COARSEY = 84,
DX_DERIV_FINEX = 85,
@@ -465,6 +466,14 @@ enum dxil_atomic_binop_code
ATOMIC_BINOP_INVALID,
};
+enum dxil_sync_flags
+{
+ SYNC_THREAD_GROUP = 0x1,
+ SYNC_GLOBAL_UAV = 0x2,
+ SYNC_THREAD_GROUP_UAV = 0x4,
+ SYNC_GROUP_SHARED_MEMORY = 0x8,
+};
+
struct sm6_pointer_info
{
const struct sm6_type *type;
@@ -543,6 +552,7 @@ struct sm6_value
{
const struct sm6_type *type;
enum sm6_value_type value_type;
+ unsigned int structure_stride;
bool is_undefined;
union
{
@@ -755,6 +765,7 @@ struct sm6_parser
unsigned int indexable_temp_count;
unsigned int icb_count;
+ unsigned int tgsm_count;
struct sm6_value *values;
size_t value_count;
@@ -2267,6 +2278,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st
register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6);
}
+static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value)
+{
+ vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
+ reg->u.immconst_u32[0] = value;
+}
+
static void dst_param_init(struct vkd3d_shader_dst_param *param)
{
param->write_mask = VKD3DSP_WRITEMASK_0;
@@ -2330,6 +2347,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param,
param->reg = *reg;
}
+static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value)
+{
+ src_param_init(param);
+ register_make_constant_uint(&param->reg, value);
+}
+
static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address,
struct sm6_parser *sm6)
{
@@ -3009,6 +3032,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru
register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx);
}
+static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type,
+ unsigned int alignment, unsigned int init, struct sm6_value *dst)
+{
+ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type);
+ struct vkd3d_shader_instruction *ins;
+ unsigned int byte_count;
+
+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW);
+ dst_param_init(&ins->declaration.tgsm_raw.reg);
+ register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++);
+ dst->u.reg = ins->declaration.tgsm_raw.reg.reg;
+ dst->structure_stride = 0;
+ ins->declaration.tgsm_raw.alignment = alignment;
+ byte_count = elem_type->u.width / 8u;
+ if (byte_count != 4)
+ {
+ FIXME("Unsupported byte count %u.\n", byte_count);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Raw TGSM byte count %u is not supported.", byte_count);
+ }
+ ins->declaration.tgsm_raw.byte_count = byte_count;
+ /* The initialiser value index will be resolved later when forward references can be handled. */
+ ins->flags = init;
+}
+
+static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type,
+ unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst)
+{
+ enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type);
+ struct vkd3d_shader_instruction *ins;
+ unsigned int structure_stride;
+
+ ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED);
+ dst_param_init(&ins->declaration.tgsm_structured.reg);
+ register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM,
+ data_type, sm6->tgsm_count++);
+ dst->u.reg = ins->declaration.tgsm_structured.reg.reg;
+ structure_stride = elem_type->u.width / 8u;
+ if (structure_stride != 4)
+ {
+ FIXME("Unsupported structure stride %u.\n", structure_stride);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Structured TGSM byte stride %u is not supported.", structure_stride);
+ }
+ dst->structure_stride = structure_stride;
+ ins->declaration.tgsm_structured.alignment = alignment;
+ ins->declaration.tgsm_structured.byte_stride = structure_stride;
+ ins->declaration.tgsm_structured.structure_count = count;
+ /* The initialiser value index will be resolved later when forward references can be handled. */
+ ins->flags = init;
+}
+
static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record)
{
const struct sm6_type *type, *scalar_type;
@@ -3134,10 +3209,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_
}
else if (address_space == ADDRESS_SPACE_GROUPSHARED)
{
- FIXME("Unsupported TGSM.\n");
- vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
- "TGSM global variables are not supported.");
- return false;
+ if (!sm6_type_is_numeric(scalar_type))
+ {
+ WARN("Unsupported type class %u.\n", scalar_type->class);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "TGSM variables of type class %u are not supported.", scalar_type->class);
+ return false;
+ }
+ if (count == 1)
+ sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst);
+ else
+ sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst);
}
else
{
@@ -3173,6 +3255,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init
return NULL;
}
+static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6)
+{
+ const struct sm6_value *value;
+
+ if (!index)
+ return false;
+
+ --index;
+ if (!(value = sm6_parser_get_value_safe(sm6, index))
+ || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value)))
+ {
+ WARN("Invalid initialiser index %zu.\n", index);
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "TGSM initialiser value index %zu is invalid.", index);
+ return false;
+ }
+ else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value))
+ {
+ return true;
+ }
+ else if (sm6_value_is_undef(value))
+ {
+ /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */
+ return false;
+ }
+
+ FIXME("Non-zero initialisers are not supported.\n");
+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
+ "Non-zero TGSM initialisers are not supported.");
+ return false;
+}
+
static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6)
{
size_t i, count, base_value_idx = sm6->value_count;
@@ -3246,6 +3360,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6)
{
ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6);
}
+ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW)
+ {
+ ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6);
+ ins->flags = 0;
+ }
+ else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED)
+ {
+ ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6);
+ ins->flags = 0;
+ }
}
for (i = base_value_idx; i < sm6->value_count; ++i)
{
@@ -3989,6 +4113,27 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr
dst->u.reg = dst_params[0].reg;
}
+static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
+ const struct sm6_value **operands, struct function_emission_state *state)
+{
+ struct vkd3d_shader_instruction *ins = state->ins;
+ enum dxil_sync_flags flags;
+
+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC);
+ flags = sm6_value_get_constant_uint(operands[0]);
+ ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV);
+ if (flags & SYNC_GLOBAL_UAV)
+ ins->flags |= VKD3DSSF_GLOBAL_UAV;
+ if (flags & SYNC_GROUP_SHARED_MEMORY)
+ ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY;
+ if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY))
+ {
+ FIXME("Unhandled flags %#x.\n", flags);
+ vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS,
+ "Barrier flags %#x are unhandled.", flags);
+ }
+}
+
static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op,
const struct sm6_value **operands, struct function_emission_state *state)
{
@@ -4818,6 +4963,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] =
[DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary},
[DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop},
[DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop},
+ [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier},
[DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary},
[DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load},
[DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store},
@@ -5541,6 +5687,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record
register_index_address_init(&reg->idx[1], elem_value, sm6);
reg->idx[1].is_in_bounds = is_in_bounds;
reg->idx_count = 2;
+ dst->structure_stride = src->structure_stride;
ins->handler_idx = VKD3DSIH_NOP;
}
@@ -5549,8 +5696,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor
struct vkd3d_shader_instruction *ins, struct sm6_value *dst)
{
const struct sm6_type *elem_type = NULL, *pointee_type;
- struct vkd3d_shader_src_param *src_param;
- unsigned int alignment, i = 0;
+ unsigned int alignment, operand_count, i = 0;
+ struct vkd3d_shader_src_param *src_params;
const struct sm6_value *ptr;
uint64_t alignment_code;
@@ -5587,12 +5734,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor
if (record->operands[i])
WARN("Ignoring volatile modifier.\n");
- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV);
+ if (ptr->structure_stride)
+ {
+ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED);
- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6)))
- return;
- src_param_init_from_value(&src_param[0], ptr);
- src_param->reg.alignment = alignment;
+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6)))
+ return;
+ if (ptr->u.reg.idx[1].rel_addr)
+ src_params[0] = *ptr->u.reg.idx[1].rel_addr;
+ else
+ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset);
+ /* Struct offset is always zero as there is no struct, just an array. */
+ src_param_make_constant_uint(&src_params[1], 0);
+ src_param_init_from_value(&src_params[2], ptr);
+ src_params[2].reg.alignment = alignment;
+ }
+ else
+ {
+ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV);
+
+ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6)))
+ return;
+ if (operand_count > 1)
+ src_param_make_constant_uint(&src_params[0], 0);
+ src_param_init_from_value(&src_params[operand_count - 1], ptr);
+ src_params[operand_count - 1].reg.alignment = alignment;
+ }
instruction_dst_param_init_ssa_scalar(ins, sm6);
}
@@ -5710,11 +5879,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record
static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record,
struct vkd3d_shader_instruction *ins, struct sm6_value *dst)
{
- struct vkd3d_shader_src_param *src_param;
+ unsigned int i = 0, alignment, operand_count;
+ struct vkd3d_shader_src_param *src_params;
struct vkd3d_shader_dst_param *dst_param;
const struct sm6_type *pointee_type;
const struct sm6_value *ptr, *src;
- unsigned int i = 0, alignment;
uint64_t alignment_code;
if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))
@@ -5747,16 +5916,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco
if (record->operands[i])
WARN("Ignoring volatile modifier.\n");
- vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV);
+ if (ptr->structure_stride)
+ {
+ assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED);
- if (!(src_param = instruction_src_params_alloc(ins, 1, sm6)))
- return;
- src_param_init_from_value(&src_param[0], src);
+ if (!(src_params = instruction_src_params_alloc(ins, 3, sm6)))
+ return;
+ if (ptr->u.reg.idx[1].rel_addr)
+ src_params[0] = *ptr->u.reg.idx[1].rel_addr;
+ else
+ src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset);
+ /* Struct offset is always zero as there is no struct, just an array. */
+ src_param_make_constant_uint(&src_params[1], 0);
+ src_param_init_from_value(&src_params[2], src);
+ }
+ else
+ {
+ operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM);
+ vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV);
+
+ if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6)))
+ return;
+ if (operand_count > 1)
+ src_param_make_constant_uint(&src_params[0], 0);
+ src_param_init_from_value(&src_params[operand_count - 1], src);
+ }
dst_param = instruction_dst_params_alloc(ins, 1, sm6);
dst_param_init(dst_param);
dst_param->reg = ptr->u.reg;
dst_param->reg.alignment = alignment;
+ /* Groupshared stores contain the address in the src params. */
+ if (dst_param->reg.type != VKD3DSPR_IDXTEMP)
+ dst_param->reg.idx_count = 1;
}
static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record,
@@ -8481,7 +8674,7 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi
vkd3d_free(byte_code);
if (!sm6->p.failed && ret >= 0)
- ret = vsir_validate(&sm6->p);
+ ret = vkd3d_shader_parser_validate(&sm6->p);
if (sm6->p.failed && ret >= 0)
ret = VKD3D_ERROR_INVALID_SHADER;
diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c
index da90782c814..23ea89c47be 100644
--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c
@@ -39,6 +39,13 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error(
generator->failed = true;
}
+static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
+{
+ vkd3d_string_buffer_printf(&gen->buffer, "/* <unhandled instruction %#x> */\n", ins->handler_idx);
+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
+ "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx);
+}
+
static void shader_glsl_ret(struct vkd3d_glsl_generator *generator,
const struct vkd3d_shader_instruction *ins)
{
@@ -69,9 +76,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator
shader_glsl_ret(generator, instruction);
break;
default:
- vkd3d_glsl_compiler_error(generator,
- VKD3D_SHADER_ERROR_GLSL_INTERNAL,
- "Unhandled instruction %#x", instruction->handler_idx);
+ shader_glsl_unhandled(generator, instruction);
break;
}
}
@@ -92,11 +97,14 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator,
vkd3d_glsl_handle_instruction(generator, &instructions->elements[i]);
}
+ vkd3d_string_buffer_printf(&generator->buffer, "}\n");
+
+ if (TRACE_ON())
+ vkd3d_string_buffer_trace(&generator->buffer);
+
if (generator->failed)
return VKD3D_ERROR_INVALID_SHADER;
- vkd3d_string_buffer_printf(&generator->buffer, "}\n");
-
if ((code = vkd3d_malloc(generator->buffer.buffer_size)))
{
memcpy(code, generator->buffer.buffer, generator->buffer.content_size);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 2820b9abf67..a82334e58fd 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -1348,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp
return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
}
+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
+ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3)
+{
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3};
+
+ assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type));
+ assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type));
+ return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc);
+}
+
struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition,
struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc)
{
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index 681f2edce31..5ced5edc766 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -594,6 +594,7 @@ enum hlsl_ir_expr_op
HLSL_OP2_MUL,
HLSL_OP2_NEQUAL,
HLSL_OP2_RSHIFT,
+ /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */
HLSL_OP2_SLT,
/* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy,
@@ -1247,6 +1248,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond
struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx,
enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc);
+struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op,
+ struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3);
void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index 9eb65dc0170..ff349ab49ef 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -2902,7 +2902,7 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct
return true;
}
-/* Use movc/cmp/slt for the ternary operator. */
+/* Use movc/cmp for the ternary operator. */
static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement;
@@ -2928,7 +2928,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
return false;
}
- if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
+ if (ctx->profile->major_version < 4)
{
struct hlsl_ir_node *abs, *neg;
@@ -2946,51 +2946,6 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc)))
return false;
}
- else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX)
- {
- struct hlsl_ir_node *neg, *slt, *sum, *cond2, *slt_cast, *mul;
-
- /* Expression used here is "slt(<cond>) * (first - second) + second". */
-
- if (ctx->profile->major_version == 3)
- {
- if (!(cond2 = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond, &instr->loc)))
- return false;
- }
- else
- {
- if (!(cond2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, cond, cond)))
- return false;
- }
- hlsl_block_add_instr(block, cond2);
-
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, cond2, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, neg);
-
- if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, neg, cond2)))
- return false;
- hlsl_block_add_instr(block, slt);
-
- if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, second, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, neg);
-
- if (!(sum = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, first, neg)))
- return false;
- hlsl_block_add_instr(block, sum);
-
- if (!(slt_cast = hlsl_new_cast(ctx, slt, sum->data_type, &instr->loc)))
- return false;
- hlsl_block_add_instr(block, slt_cast);
-
- if (!(mul = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, slt_cast, sum)))
- return false;
- hlsl_block_add_instr(block, mul);
-
- if (!(replacement = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul, second)))
- return false;
- }
else
{
if (cond->data_type->base_type == HLSL_TYPE_FLOAT)
@@ -3020,6 +2975,261 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru
return true;
}
+static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
+ struct hlsl_block *block)
+{
+ struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret;
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS];
+ struct hlsl_type *float_type;
+ struct hlsl_ir_expr *expr;
+ bool negate = false;
+
+ if (instr->type != HLSL_IR_EXPR)
+ return false;
+ expr = hlsl_ir_expr(instr);
+ if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS
+ && expr->op != HLSL_OP2_GEQUAL)
+ return false;
+
+ arg1 = expr->operands[0].node;
+ arg2 = expr->operands[1].node;
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+
+ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, arg1_cast);
+
+ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, arg2_cast);
+
+ switch (expr->op)
+ {
+ case HLSL_OP2_EQUAL:
+ case HLSL_OP2_NEQUAL:
+ {
+ struct hlsl_ir_node *neg, *sub, *abs, *abs_neg;
+
+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, neg);
+
+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg)))
+ return false;
+ hlsl_block_add_instr(block, sub);
+
+ if (ctx->profile->major_version >= 3)
+ {
+ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, abs);
+ }
+ else
+ {
+ /* Use MUL as a precarious ABS. */
+ if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub)))
+ return false;
+ hlsl_block_add_instr(block, abs);
+ }
+
+ if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, abs_neg);
+
+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs)))
+ return false;
+ hlsl_block_add_instr(block, slt);
+
+ negate = (expr->op == HLSL_OP2_EQUAL);
+ break;
+ }
+
+ case HLSL_OP2_GEQUAL:
+ case HLSL_OP2_LESS:
+ {
+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast)))
+ return false;
+ hlsl_block_add_instr(block, slt);
+
+ negate = (expr->op == HLSL_OP2_GEQUAL);
+ break;
+ }
+
+ default:
+ vkd3d_unreachable();
+ }
+
+ if (negate)
+ {
+ struct hlsl_constant_value one_value;
+ struct hlsl_ir_node *one, *slt_neg;
+
+ one_value.u[0].f = 1.0;
+ one_value.u[1].f = 1.0;
+ one_value.u[2].f = 1.0;
+ one_value.u[3].f = 1.0;
+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, one);
+
+ if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, slt_neg);
+
+ if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg)))
+ return false;
+ hlsl_block_add_instr(block, res);
+ }
+ else
+ {
+ res = slt;
+ }
+
+ /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT,
+ * and casts to BOOL have already been lowered to "!= 0". */
+ memset(operands, 0, sizeof(operands));
+ operands[0] = res;
+ if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, ret);
+
+ return true;
+}
+
+/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to
+ * CMP instructions (only available in pixel shaders).
+ * Based on the following equivalence:
+ * SLT(x, y)
+ * = (x < y) ? 1.0 : 0.0
+ * = ((x - y) >= 0) ? 0.0 : 1.0
+ * = CMP(x - y, 0.0, 1.0)
+ */
+static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
+{
+ struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp;
+ struct hlsl_constant_value zero_value, one_value;
+ struct hlsl_type *float_type;
+ struct hlsl_ir_expr *expr;
+
+ if (instr->type != HLSL_IR_EXPR)
+ return false;
+ expr = hlsl_ir_expr(instr);
+ if (expr->op != HLSL_OP2_SLT)
+ return false;
+
+ arg1 = expr->operands[0].node;
+ arg2 = expr->operands[1].node;
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+
+ if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, arg1_cast);
+
+ if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, arg2_cast);
+
+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, neg);
+
+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg)))
+ return false;
+ hlsl_block_add_instr(block, sub);
+
+ memset(&zero_value, 0, sizeof(zero_value));
+ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, zero);
+
+ one_value.u[0].f = 1.0;
+ one_value.u[1].f = 1.0;
+ one_value.u[2].f = 1.0;
+ one_value.u[3].f = 1.0;
+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, one);
+
+ if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one)))
+ return false;
+ hlsl_block_add_instr(block, cmp);
+
+ return true;
+}
+
+/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to
+ * SLT instructions (only available in vertex shaders).
+ * Based on the following equivalence:
+ * CMP(x, y, z)
+ * = (x >= 0) ? y : z
+ * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0)
+ * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0))
+ */
+static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
+{
+ struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add;
+ struct hlsl_constant_value zero_value, one_value;
+ struct hlsl_type *float_type;
+ struct hlsl_ir_expr *expr;
+ unsigned int i;
+
+ if (instr->type != HLSL_IR_EXPR)
+ return false;
+ expr = hlsl_ir_expr(instr);
+ if (expr->op != HLSL_OP3_CMP)
+ return false;
+
+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx);
+
+ for (i = 0; i < 3; ++i)
+ {
+ args[i] = expr->operands[i].node;
+
+ if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, args_cast[i]);
+ }
+
+ memset(&zero_value, 0, sizeof(zero_value));
+ if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, zero);
+
+ one_value.u[0].f = 1.0;
+ one_value.u[1].f = 1.0;
+ one_value.u[2].f = 1.0;
+ one_value.u[3].f = 1.0;
+ if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, one);
+
+ if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero)))
+ return false;
+ hlsl_block_add_instr(block, slt);
+
+ if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt)))
+ return false;
+ hlsl_block_add_instr(block, mul1);
+
+ if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc)))
+ return false;
+ hlsl_block_add_instr(block, neg_slt);
+
+ if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt)))
+ return false;
+ hlsl_block_add_instr(block, sub);
+
+ if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub)))
+ return false;
+ hlsl_block_add_instr(block, mul2);
+
+ if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2)))
+ return false;
+ hlsl_block_add_instr(block, add);
+
+ return true;
+}
+
static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
{
struct hlsl_type *type = instr->data_type, *arg_type;
@@ -5209,6 +5419,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_ir(ctx, lower_round, body);
lower_ir(ctx, lower_ceil, body);
lower_ir(ctx, lower_floor, body);
+ lower_ir(ctx, lower_comparison_operators, body);
+ if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
+ lower_ir(ctx, lower_slt, body);
+ else
+ lower_ir(ctx, lower_cmp, body);
}
if (profile->major_version < 2)
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index 0dd31af9192..55d1216460f 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -1386,10 +1386,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi
}
}
-static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser)
+static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program)
{
- struct io_normaliser normaliser = {parser->program.instructions};
- struct vsir_program *program = &parser->program;
+ struct io_normaliser normaliser = {program->instructions};
struct vkd3d_shader_instruction *ins;
bool has_control_point_phase;
unsigned int i, j;
@@ -1671,19 +1670,20 @@ static void remove_dead_code(struct vsir_program *program)
}
}
-static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser)
+static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program,
+ struct vkd3d_shader_message_context *message_context)
{
unsigned int i;
- for (i = 0; i < parser->program.instructions.count; ++i)
+ for (i = 0; i < program->instructions.count; ++i)
{
- struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i];
+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
struct vkd3d_shader_src_param *srcs;
switch (ins->handler_idx)
{
case VKD3DSIH_TEX:
- if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3)))
+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3)))
return VKD3D_ERROR_OUT_OF_MEMORY;
memset(srcs, 0, sizeof(*srcs) * 3);
@@ -1726,7 +1726,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser
case VKD3DSIH_TEXREG2AR:
case VKD3DSIH_TEXREG2GB:
case VKD3DSIH_TEXREG2RGB:
- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
+ vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
"Aborting due to not yet implemented feature: "
"Combined sampler instruction %#x.", ins->handler_idx);
return VKD3D_ERROR_NOT_IMPLEMENTED;
@@ -1792,10 +1792,10 @@ struct cf_flattener_info
struct cf_flattener
{
- struct vkd3d_shader_parser *parser;
+ struct vsir_program *program;
struct vkd3d_shader_location location;
- bool allocation_failed;
+ enum vkd3d_result status;
struct vkd3d_shader_instruction *instructions;
size_t instruction_capacity;
@@ -1815,13 +1815,20 @@ struct cf_flattener
size_t control_flow_info_size;
};
+static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error)
+{
+ if (flattener->status != VKD3D_OK)
+ return;
+ flattener->status = error;
+}
+
static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count)
{
if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity,
flattener->instruction_count + count, sizeof(*flattener->instructions)))
{
ERR("Failed to allocate instructions.\n");
- flattener->allocation_failed = true;
+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
return NULL;
}
return &flattener->instructions[flattener->instruction_count];
@@ -1853,9 +1860,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_
{
struct vkd3d_shader_src_param *params;
- if (!(params = vsir_program_get_src_params(&flattener->parser->program, count)))
+ if (!(params = vsir_program_get_src_params(flattener->program, count)))
{
- flattener->allocation_failed = true;
+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
return NULL;
}
ins->src = params;
@@ -1869,10 +1876,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int
if (!(ins = cf_flattener_require_space(flattener, 1)))
return;
- if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program))
+ if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program))
++flattener->instruction_count;
else
- flattener->allocation_failed = true;
+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
}
/* For conditional branches, this returns the false target branch parameter. */
@@ -1950,7 +1957,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_
flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info)))
{
ERR("Failed to allocate control flow info structure.\n");
- flattener->allocation_failed = true;
+ cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
return NULL;
}
@@ -2017,12 +2024,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla
flattener->block_names[block_id] = buffer.buffer;
}
-static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener)
+static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener,
+ struct vkd3d_shader_message_context *message_context)
{
bool main_block_open, is_hull_shader, after_declarations_section;
- struct vkd3d_shader_parser *parser = flattener->parser;
struct vkd3d_shader_instruction_array *instructions;
- struct vsir_program *program = &parser->program;
+ struct vsir_program *program = flattener->program;
struct vkd3d_shader_instruction *dst_ins;
size_t i;
@@ -2074,7 +2081,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
break;
case VKD3DSIH_LABEL:
- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
+ vkd3d_shader_error(message_context, &instruction->location,
+ VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
"Aborting due to not yet implemented feature: Label instruction.");
return VKD3D_ERROR_NOT_IMPLEMENTED;
@@ -2239,8 +2247,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X))
{
WARN("Unexpected src swizzle %#x.\n", src->swizzle);
- vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE,
+ vkd3d_shader_error(message_context, &instruction->location,
+ VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE,
"The swizzle for a switch case value is not scalar X.");
+ cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER);
}
value = *src->reg.u.immconst_u32;
@@ -2368,21 +2378,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
++flattener->instruction_count;
}
- return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK;
+ return flattener->status;
}
-static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser)
+static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program,
+ struct vkd3d_shader_message_context *message_context)
{
- struct vsir_program *program = &parser->program;
- struct cf_flattener flattener = {0};
+ struct cf_flattener flattener = {.program = program};
enum vkd3d_result result;
- flattener.parser = parser;
- result = cf_flattener_iterate_instruction_array(&flattener);
-
- if (result >= 0)
+ if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0)
{
- vkd3d_free(parser->program.instructions.elements);
+ vkd3d_free(program->instructions.elements);
program->instructions.elements = flattener.instructions;
program->instructions.capacity = flattener.instruction_capacity;
program->instructions.count = flattener.instruction_count;
@@ -3142,6 +3149,93 @@ static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_blo
return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare);
}
+struct vsir_cfg_structure_list
+{
+ struct vsir_cfg_structure *structures;
+ size_t count, capacity;
+ unsigned int end;
+};
+
+struct vsir_cfg_structure
+{
+ enum vsir_cfg_structure_type
+ {
+ /* Execute a block of the original VSIR program. */
+ STRUCTURE_TYPE_BLOCK,
+ /* Execute a loop, which is identified by an index. */
+ STRUCTURE_TYPE_LOOP,
+ /* Execute a `return' or a (possibly) multilevel `break' or
+ * `continue', targeting a loop by its index. If `condition'
+ * is non-NULL, then the jump is conditional (this is
+ * currently not allowed for `return'). */
+ STRUCTURE_TYPE_JUMP,
+ } type;
+ union
+ {
+ struct vsir_block *block;
+ struct
+ {
+ struct vsir_cfg_structure_list body;
+ unsigned idx;
+ } loop;
+ struct
+ {
+ enum vsir_cfg_jump_type
+ {
+ /* NONE is available as an intermediate value, but it
+ * is not allowed in valid structured programs. */
+ JUMP_NONE,
+ JUMP_BREAK,
+ JUMP_CONTINUE,
+ JUMP_RET,
+ } type;
+ unsigned int target;
+ struct vkd3d_shader_src_param *condition;
+ bool invert_condition;
+ } jump;
+ } u;
+};
+
+static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type);
+static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure);
+
+static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list)
+{
+ unsigned int i;
+
+ for (i = 0; i < list->count; ++i)
+ vsir_cfg_structure_cleanup(&list->structures[i]);
+ vkd3d_free(list->structures);
+}
+
+static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list,
+ enum vsir_cfg_structure_type type)
+{
+ struct vsir_cfg_structure *ret;
+
+ if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1,
+ sizeof(*list->structures)))
+ return NULL;
+
+ ret = &list->structures[list->count++];
+
+ vsir_cfg_structure_init(ret, type);
+
+ return ret;
+}
+
+static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type)
+{
+ memset(structure, 0, sizeof(*structure));
+ structure->type = type;
+}
+
+static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure)
+{
+ if (structure->type == STRUCTURE_TYPE_LOOP)
+ vsir_cfg_structure_list_cleanup(&structure->u.loop.body);
+}
+
struct vsir_cfg
{
struct vkd3d_shader_message_context *message_context;
@@ -3187,6 +3281,8 @@ struct vsir_cfg
bool synthetic;
} *loop_intervals;
size_t loop_interval_count, loop_interval_capacity;
+
+ struct vsir_cfg_structure_list structured_program;
};
static void vsir_cfg_cleanup(struct vsir_cfg *cfg)
@@ -3201,6 +3297,8 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg)
vsir_block_list_cleanup(&cfg->order);
+ vsir_cfg_structure_list_cleanup(&cfg->structured_program);
+
vkd3d_free(cfg->blocks);
vkd3d_free(cfg->loops);
vkd3d_free(cfg->loops_by_header);
@@ -3288,6 +3386,76 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg)
TRACE("}\n");
}
+static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list);
+
+static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure)
+{
+ switch (structure->type)
+ {
+ case STRUCTURE_TYPE_BLOCK:
+ TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label);
+ break;
+
+ case STRUCTURE_TYPE_LOOP:
+ TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx);
+
+ vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body);
+
+ TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx);
+ break;
+
+ case STRUCTURE_TYPE_JUMP:
+ {
+ const char *type_str;
+
+ switch (structure->u.jump.type)
+ {
+ case JUMP_RET:
+ TRACE("%sret\n", cfg->debug_buffer.buffer);
+ return;
+
+ case JUMP_BREAK:
+ type_str = "break";
+ break;
+
+ case JUMP_CONTINUE:
+ type_str = "continue";
+ break;
+
+ default:
+ vkd3d_unreachable();
+ }
+
+ TRACE("%s%s%s %u\n", cfg->debug_buffer.buffer, type_str,
+ structure->u.jump.condition ? "c" : "", structure->u.jump.target);
+ break;
+ }
+
+ default:
+ vkd3d_unreachable();
+ }
+}
+
+static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list)
+{
+ unsigned int i;
+
+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " ");
+
+ for (i = 0; i < list->count; ++i)
+ vsir_cfg_structure_dump(cfg, &list->structures[i]);
+
+ vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2);
+}
+
+static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg)
+{
+ unsigned int i;
+
+ for (i = 0; i < cfg->structured_program.count; ++i)
+ vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]);
+}
+
static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program,
struct vkd3d_shader_message_context *message_context)
{
@@ -3916,6 +4084,217 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_
return VKD3D_OK;
}
+struct vsir_cfg_edge_action
+{
+ enum vsir_cfg_jump_type jump_type;
+ unsigned int target;
+ struct vsir_block *successor;
+};
+
+static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block,
+ struct vsir_block *successor, struct vsir_cfg_edge_action *action)
+{
+ unsigned int i;
+
+ action->target = UINT_MAX;
+ action->successor = successor;
+
+ if (successor->order_pos <= block->order_pos)
+ {
+ /* The successor is before the current block, so we have to
+ * use `continue'. The target loop is the innermost that
+ * contains the current block and has the successor as
+ * `continue' target. */
+ for (i = 0; i < cfg->loop_interval_count; ++i)
+ {
+ struct cfg_loop_interval *interval = &cfg->loop_intervals[i];
+
+ if (interval->begin == successor->order_pos && block->order_pos < interval->end)
+ action->target = i;
+
+ if (interval->begin > successor->order_pos)
+ break;
+ }
+
+ assert(action->target != UINT_MAX);
+ action->jump_type = JUMP_CONTINUE;
+ }
+ else
+ {
+ /* The successor is after the current block, so we have to use
+ * `break', or possibly just jump to the following block. The
+ * target loop is the outermost that contains the current
+ * block and has the successor as `break' target. */
+ for (i = 0; i < cfg->loop_interval_count; ++i)
+ {
+ struct cfg_loop_interval *interval = &cfg->loop_intervals[i];
+
+ if (interval->begin <= block->order_pos && interval->end == successor->order_pos)
+ {
+ action->target = i;
+ break;
+ }
+ }
+
+ if (action->target == UINT_MAX)
+ {
+ assert(successor->order_pos == block->order_pos + 1);
+ action->jump_type = JUMP_NONE;
+ }
+ else
+ {
+ action->jump_type = JUMP_BREAK;
+ }
+ }
+}
+
+static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg)
+{
+ unsigned int i, stack_depth = 1, open_interval_idx = 0;
+ struct vsir_cfg_structure_list **stack = NULL;
+
+ /* It's enough to allocate up to the maximum interval stacking
+ * depth (plus one for the full program), but this is simpler. */
+ if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack))))
+ goto fail;
+ cfg->structured_program.end = cfg->order.count;
+ stack[0] = &cfg->structured_program;
+
+ for (i = 0; i < cfg->order.count; ++i)
+ {
+ struct vsir_block *block = cfg->order.blocks[i];
+ struct vsir_cfg_structure *structure;
+
+ assert(stack_depth > 0);
+
+ /* Open loop intervals. */
+ while (open_interval_idx < cfg->loop_interval_count)
+ {
+ struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx];
+
+ if (interval->begin != i)
+ break;
+
+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP)))
+ goto fail;
+ structure->u.loop.idx = open_interval_idx++;
+
+ structure->u.loop.body.end = interval->end;
+ stack[stack_depth++] = &structure->u.loop.body;
+ }
+
+ /* Execute the block. */
+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK)))
+ goto fail;
+ structure->u.block = block;
+
+ /* Generate between zero and two jump instructions. */
+ switch (block->end->handler_idx)
+ {
+ case VKD3DSIH_BRANCH:
+ {
+ struct vsir_cfg_edge_action action_true, action_false;
+ bool invert_condition = false;
+
+ if (vsir_register_is_label(&block->end->src[0].reg))
+ {
+ unsigned int target = label_from_src_param(&block->end->src[0]);
+ struct vsir_block *successor = &cfg->blocks[target - 1];
+
+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true);
+ action_false = action_true;
+ }
+ else
+ {
+ unsigned int target = label_from_src_param(&block->end->src[1]);
+ struct vsir_block *successor = &cfg->blocks[target - 1];
+
+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_true);
+
+ target = label_from_src_param(&block->end->src[2]);
+ successor = &cfg->blocks[target - 1];
+
+ vsir_cfg_compute_edge_action(cfg, block, successor, &action_false);
+ }
+
+ /* This will happen if the branch is unconditional,
+ * but also if it's conditional with the same target
+ * in both branches, which can happen in some corner
+ * cases, e.g. when converting switch instructions to
+ * selection ladders. */
+ if (action_true.successor == action_false.successor)
+ {
+ assert(action_true.jump_type == action_false.jump_type);
+ }
+ else
+ {
+ /* At most one branch can just fall through to the
+ * next block, in which case we make sure it's the
+ * false branch. */
+ if (action_true.jump_type == JUMP_NONE)
+ {
+ struct vsir_cfg_edge_action tmp = action_true;
+ action_true = action_false;
+ action_false = tmp;
+ invert_condition = true;
+ }
+
+ assert(action_true.jump_type != JUMP_NONE);
+
+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP)))
+ goto fail;
+ structure->u.jump.type = action_true.jump_type;
+ structure->u.jump.target = action_true.target;
+ structure->u.jump.condition = &block->end->src[0];
+ structure->u.jump.invert_condition = invert_condition;
+ }
+
+ if (action_false.jump_type != JUMP_NONE)
+ {
+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP)))
+ goto fail;
+ structure->u.jump.type = action_false.jump_type;
+ structure->u.jump.target = action_false.target;
+ }
+ break;
+ }
+
+ case VKD3DSIH_RET:
+ if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP)))
+ goto fail;
+ structure->u.jump.type = JUMP_RET;
+ break;
+
+ default:
+ vkd3d_unreachable();
+ }
+
+ /* Close loop intervals. */
+ while (stack_depth > 0)
+ {
+ if (stack[stack_depth - 1]->end != i + 1)
+ break;
+
+ --stack_depth;
+ }
+ }
+
+ assert(stack_depth == 0);
+ assert(open_interval_idx == cfg->loop_interval_count);
+
+ if (TRACE_ON())
+ vsir_cfg_dump_structured_program(cfg);
+
+ vkd3d_free(stack);
+
+ return VKD3D_OK;
+
+fail:
+ vkd3d_free(stack);
+
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+}
+
enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
const struct vkd3d_shader_compile_info *compile_info)
{
@@ -3961,6 +4340,12 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
return result;
}
+ if ((result = vsir_cfg_build_structured_program(&cfg)) < 0)
+ {
+ vsir_cfg_cleanup(&cfg);
+ return result;
+ }
+
if ((result = vsir_program_structurise(program)) < 0)
{
vsir_cfg_cleanup(&cfg);
@@ -3987,7 +4372,7 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
return result;
}
- if ((result = shader_normalise_io_registers(parser)) < 0)
+ if ((result = vsir_program_normalise_io_registers(program)) < 0)
return result;
if ((result = instruction_array_normalise_flat_constants(program)) < 0)
@@ -3995,31 +4380,31 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
remove_dead_code(program);
- if ((result = normalise_combined_samplers(parser)) < 0)
+ if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0)
return result;
}
- if ((result = flatten_control_flow_constructs(parser)) < 0)
+ if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
return result;
if (TRACE_ON())
vkd3d_shader_trace(program);
- if (!parser->failed && (result = vsir_validate(parser)) < 0)
+ if ((result = vsir_program_validate(program, parser->config_flags,
+ compile_info->source_name, message_context)) < 0)
return result;
- if (parser->failed)
- result = VKD3D_ERROR_INVALID_SHADER;
-
return result;
}
struct validation_context
{
- struct vkd3d_shader_parser *parser;
+ struct vkd3d_shader_message_context *message_context;
const struct vsir_program *program;
size_t instruction_idx;
+ struct vkd3d_shader_location null_location;
bool invalid_instruction_idx;
+ enum vkd3d_result status;
bool dcl_temps_found;
enum vkd3d_shader_opcode phase;
enum cf_type
@@ -4065,16 +4450,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c
if (ctx->invalid_instruction_idx)
{
- vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer);
+ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer);
ERR("VSIR validation error: %s\n", buf.buffer);
}
else
{
- vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer);
+ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx];
+ vkd3d_shader_error(ctx->message_context, &ins->location, error,
+ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer);
ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer);
}
vkd3d_string_buffer_cleanup(&buf);
+
+ if (!ctx->status)
+ ctx->status = VKD3D_ERROR_INVALID_SHADER;
}
static void vsir_validate_src_param(struct validation_context *ctx,
@@ -4128,10 +4518,10 @@ static void vsir_validate_register(struct validation_context *ctx,
if (reg->idx[0].rel_addr)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register.");
- if (reg->idx[0].offset >= ctx->parser->program.temp_count)
+ if (reg->idx[0].offset >= ctx->program->temp_count)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.",
- reg->idx[0].offset, ctx->parser->program.temp_count);
+ reg->idx[0].offset, ctx->program->temp_count);
break;
}
@@ -4321,7 +4711,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx,
switch (dst->reg.type)
{
case VKD3DSPR_SSA:
- if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count)
+ if (dst->reg.idx[0].offset < ctx->program->ssa_count)
{
struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset];
@@ -4374,7 +4764,7 @@ static void vsir_validate_src_param(struct validation_context *ctx,
switch (src->reg.type)
{
case VKD3DSPR_SSA:
- if (src->reg.idx[0].offset < ctx->parser->program.ssa_count)
+ if (src->reg.idx[0].offset < ctx->program->ssa_count)
{
struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset];
unsigned int i;
@@ -4465,7 +4855,6 @@ static void vsir_validate_instruction(struct validation_context *ctx)
size_t i;
instruction = &ctx->program->instructions.elements[ctx->instruction_idx];
- ctx->parser->location = instruction->location;
for (i = 0; i < instruction->dst_count; ++i)
vsir_validate_dst_param(ctx, &instruction->dst[i]);
@@ -4816,17 +5205,20 @@ static void vsir_validate_instruction(struct validation_context *ctx)
}
}
-enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser)
+enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags,
+ const char *source_name, struct vkd3d_shader_message_context *message_context)
{
struct validation_context ctx =
{
- .parser = parser,
- .program = &parser->program,
+ .message_context = message_context,
+ .program = program,
+ .null_location = {.source_name = source_name},
+ .status = VKD3D_OK,
.phase = VKD3DSIH_INVALID,
};
unsigned int i;
- if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION))
+ if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION))
return VKD3D_OK;
if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps))))
@@ -4835,7 +5227,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser)
if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas))))
goto fail;
- for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx)
+ for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx)
vsir_validate_instruction(&ctx);
ctx.invalid_instruction_idx = true;
@@ -4860,7 +5252,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser)
vkd3d_free(ctx.temps);
vkd3d_free(ctx.ssas);
- return VKD3D_OK;
+ return ctx.status;
fail:
vkd3d_free(ctx.blocks);
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index 1518afa93be..b4f34c42124 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -6435,20 +6435,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp
}
static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler,
- const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride)
+ const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size,
+ unsigned int structure_stride, bool zero_init)
{
- uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id;
+ uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id;
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
const SpvStorageClass storage_class = SpvStorageClassWorkgroup;
struct vkd3d_symbol reg_symbol;
+ /* Alignment is supported only in the Kernel execution model. */
+ if (alignment)
+ TRACE("Ignoring alignment %u.\n", alignment);
+
type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1);
length_id = spirv_compiler_get_constant_uint(compiler, size);
array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id);
pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id);
+ init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0;
var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream,
- pointer_type_id, storage_class, 0);
+ pointer_type_id, storage_class, init_id);
spirv_compiler_emit_register_debug_name(builder, var_id, reg);
@@ -6463,8 +6469,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler,
const struct vkd3d_shader_instruction *instruction)
{
const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw;
- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg,
- tgsm_raw->byte_count / 4, 0);
+ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment,
+ tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init);
}
static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler,
@@ -6472,8 +6478,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi
{
const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured;
unsigned int stride = tgsm_structured->byte_stride / 4;
- spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg,
- tgsm_structured->structure_count * stride, stride);
+ spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment,
+ tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init);
}
static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler,
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index bd558693b07..cb4f6d4ddbf 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u
ins->declaration.tgsm_raw.byte_count = *tokens;
if (ins->declaration.tgsm_raw.byte_count % 4)
FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count);
+ ins->declaration.tgsm_raw.zero_init = false;
}
static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode,
@@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction
ins->declaration.tgsm_structured.structure_count = *tokens;
if (ins->declaration.tgsm_structured.byte_stride % 4)
FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride);
+ ins->declaration.tgsm_structured.zero_init = false;
}
static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode,
@@ -2724,7 +2726,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi
shader_sm4_validate_default_phase_index_ranges(sm4);
if (!sm4->p.failed)
- vsir_validate(&sm4->p);
+ vkd3d_shader_parser_validate(&sm4->p);
if (sm4->p.failed)
{
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
index 1ac372f163c..81ac84896d4 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
@@ -73,8 +73,16 @@ void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer)
void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer)
{
- buffer->buffer[0] = '\0';
- buffer->content_size = 0;
+ vkd3d_string_buffer_truncate(buffer, 0);
+}
+
+void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size)
+{
+ if (size < buffer->content_size)
+ {
+ buffer->buffer[size] = '\0';
+ buffer->content_size = size;
+ }
}
static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc)
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index 98c311b3655..7503d564af0 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -814,6 +814,8 @@ enum vkd3d_shader_type
VKD3D_SHADER_TYPE_COUNT,
};
+struct vkd3d_shader_message_context;
+
struct vkd3d_shader_version
{
enum vkd3d_shader_type type;
@@ -1088,14 +1090,18 @@ struct vkd3d_shader_tgsm
struct vkd3d_shader_tgsm_raw
{
struct vkd3d_shader_dst_param reg;
+ unsigned int alignment;
unsigned int byte_count;
+ bool zero_init;
};
struct vkd3d_shader_tgsm_structured
{
struct vkd3d_shader_dst_param reg;
+ unsigned int alignment;
unsigned int byte_stride;
unsigned int structure_count;
+ bool zero_init;
};
struct vkd3d_shader_thread_group_size
@@ -1315,6 +1321,8 @@ struct vsir_program
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve);
void vsir_program_cleanup(struct vsir_program *program);
+enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags,
+ const char *source_name, struct vkd3d_shader_message_context *message_context);
static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params(
struct vsir_program *program, unsigned int count)
@@ -1360,6 +1368,12 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse
parser->ops->parser_destroy(parser);
}
+static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser)
+{
+ return vsir_program_validate(&parser->program, parser->config_flags,
+ parser->location.source_name, parser->message_context);
+}
+
struct vkd3d_shader_descriptor_info1
{
enum vkd3d_shader_descriptor_type type;
@@ -1413,6 +1427,7 @@ void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer);
void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list);
void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list);
void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer);
+void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size);
int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f);
int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d);
int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3);
@@ -1522,8 +1537,6 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info,
int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info,
struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
-enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser);
-
static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type(
enum vkd3d_data_type data_type)
{
--
2.43.0