Files
wine-staging/patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch
Alistair Leslie-Hughes 77a24c72b8 Updated vkd3d-latest patchset
2025-04-05 11:52:49 +11:00

1780 lines
74 KiB
Diff

From 86412218c44000e79015064e3efe32a3783dab6b Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Fri, 4 Apr 2025 07:59:53 +1100
Subject: [PATCH] Updated vkd3d to f576ecc9929dd98c900bb8bc0335b91a1a0d3bff.
---
libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 32 +-
libs/vkd3d/libs/vkd3d-shader/fx.c | 4 +-
libs/vkd3d/libs/vkd3d-shader/glsl.c | 3 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 38 +
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 +
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 738 ++++++++++++++++--
libs/vkd3d/libs/vkd3d-shader/ir.c | 187 ++++-
libs/vkd3d/libs/vkd3d-shader/msl.c | 1 +
libs/vkd3d/libs/vkd3d-shader/spirv.c | 1 +
.../libs/vkd3d-shader/vkd3d_shader_main.c | 12 +-
.../libs/vkd3d-shader/vkd3d_shader_private.h | 4 +
11 files changed, 911 insertions(+), 112 deletions(-)
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
index 58e35cf22e8..b49ef9865db 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
@@ -1759,27 +1759,40 @@ static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins)
static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg)
{
+ uint32_t offset = reg->reg.idx_count ? reg->reg.idx[0].offset : 0;
+
VKD3D_ASSERT(reg->write_mask);
put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER
| sm1_encode_register_type(&reg->reg)
| (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT)
| (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT)
- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK));
+ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK));
}
static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg)
{
+ uint32_t address_mode = VKD3D_SM1_ADDRESS_MODE_ABSOLUTE, offset = 0;
+
+ if (reg->reg.idx_count)
+ {
+ offset = reg->reg.idx[0].offset;
+ if (reg->reg.idx[0].rel_addr)
+ address_mode = VKD3D_SM1_ADDRESS_MODE_RELATIVE;
+ }
+
put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER
| sm1_encode_register_type(&reg->reg)
+ | (address_mode << VKD3D_SM1_ADDRESS_MODE_SHIFT)
| (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT)
| (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT)
- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK));
+ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK));
}
static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins)
{
const struct vkd3d_shader_version *version = &d3dbc->program->shader_version;
struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer;
+ const struct vkd3d_shader_src_param *src;
const struct vkd3d_sm1_opcode_info *info;
unsigned int i;
uint32_t token;
@@ -1810,13 +1823,10 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v
for (i = 0; i < ins->src_count; ++i)
{
- if (ins->src[i].reg.idx[0].rel_addr)
- {
- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED,
- "Unhandled relative addressing on source register.");
- d3dbc->failed = true;
- }
- write_sm1_src_register(buffer, &ins->src[i]);
+ src = &ins->src[i];
+ write_sm1_src_register(buffer, src);
+ if (src->reg.idx_count && src->reg.idx[0].rel_addr)
+ write_sm1_src_register(buffer, src->reg.idx[0].rel_addr);
}
};
@@ -1831,6 +1841,7 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3
.reg.type = VKD3DSPR_CONST,
.write_mask = VKD3DSP_WRITEMASK_ALL,
.reg.idx[0].offset = ins->dst[0].reg.idx[0].offset,
+ .reg.idx_count = 1,
};
token = VKD3D_SM1_OP_DEF;
@@ -1863,6 +1874,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc,
reg.reg.type = VKD3DSPR_COMBINED_SAMPLER;
reg.write_mask = VKD3DSP_WRITEMASK_ALL;
reg.reg.idx[0].offset = reg_id;
+ reg.reg.idx_count = 1;
write_sm1_dst_register(buffer, &reg);
}
@@ -1938,6 +1950,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str
case VKD3DSIH_MAX:
case VKD3DSIH_MIN:
case VKD3DSIH_MOV:
+ case VKD3DSIH_MOVA:
case VKD3DSIH_MUL:
case VKD3DSIH_SINCOS:
case VKD3DSIH_SLT:
@@ -1982,6 +1995,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc,
uint32_t token, usage_idx;
bool ret;
+ reg.reg.idx_count = 1;
if (sm1_register_from_semantic_name(version, element->semantic_name,
element->semantic_index, output, &reg.reg.type, &reg.reg.idx[0].offset))
{
diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c
index 7a226c1c870..debcb261811 100644
--- a/libs/vkd3d/libs/vkd3d-shader/fx.c
+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c
@@ -2366,6 +2366,7 @@ static inline bool is_object_fx_type(enum state_property_component_type type)
case FX_BLEND:
case FX_VERTEXSHADER:
case FX_PIXELSHADER:
+ case FX_GEOMETRYSHADER:
return true;
default:
return false;
@@ -2761,7 +2762,8 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl
struct hlsl_ir_constant *c = hlsl_ir_constant(node);
struct hlsl_type *data_type = c->node.data_type;
- if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT)
+ if (data_type->class == HLSL_CLASS_SCALAR
+ && (data_type->e.numeric.type == HLSL_TYPE_INT || data_type->e.numeric.type == HLSL_TYPE_UINT))
{
if (c->value.u[0].u != 0)
hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c
index 828a94d77ab..a87ade5e467 100644
--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c
@@ -1296,7 +1296,7 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
"Internal compiler error: Unhandled SV_POSITION index %u.", idx);
if (version->type == VKD3D_SHADER_TYPE_PIXEL)
- vkd3d_string_buffer_printf(buffer, "gl_FragCoord");
+ vkd3d_string_buffer_printf(buffer, "vec4(gl_FragCoord.xyz, 1.0 / gl_FragCoord.w)");
else
vkd3d_string_buffer_printf(buffer, "gl_Position");
break;
@@ -2468,6 +2468,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags,
return ret;
VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
+ VKD3D_ASSERT(program->has_descriptor_info);
vkd3d_glsl_generator_init(&generator, program, compile_info,
combined_sampler_info, message_context);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 01586592b25..d1d20b7384c 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -1588,6 +1588,43 @@ void hlsl_block_add_simple_store(struct hlsl_ctx *ctx, struct hlsl_block *block,
hlsl_block_add_store_index(ctx, block, &lhs_deref, NULL, rhs, 0, &rhs->loc);
}
+static struct hlsl_ir_node *hlsl_new_store_parent(struct hlsl_ctx *ctx,
+ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs,
+ unsigned int writemask, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_store *store;
+
+ VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs));
+ VKD3D_ASSERT(lhs->path_len >= path_len);
+
+ if (!(store = hlsl_alloc(ctx, sizeof(*store))))
+ return NULL;
+ init_node(&store->node, HLSL_IR_STORE, NULL, loc);
+
+ if (!hlsl_init_deref(ctx, &store->lhs, lhs->var, path_len))
+ {
+ vkd3d_free(store);
+ return NULL;
+ }
+ for (unsigned int i = 0; i < path_len; ++i)
+ hlsl_src_from_node(&store->lhs.path[i], lhs->path[i].node);
+
+ hlsl_src_from_node(&store->rhs, rhs);
+
+ if (!writemask && type_is_single_reg(rhs->data_type))
+ writemask = (1 << rhs->data_type->e.numeric.dimx) - 1;
+ store->writemask = writemask;
+
+ return &store->node;
+}
+
+void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs,
+ unsigned int writemask, const struct vkd3d_shader_location *loc)
+{
+ append_new_instr(ctx, block, hlsl_new_store_parent(ctx, lhs, path_len, rhs, writemask, loc));
+}
+
void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block,
const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs)
{
@@ -4957,6 +4994,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d
if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY
|| target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT
+ || target_type == VKD3D_SHADER_TARGET_GLSL
|| target_type == VKD3D_SHADER_TARGET_D3D_ASM)
{
uint64_t config_flags = vkd3d_shader_init_config_flags();
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index 1d78c5622de..fafa5740963 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -1565,6 +1565,9 @@ void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *blo
void hlsl_block_add_store_index(struct hlsl_ctx *ctx, struct hlsl_block *block,
const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs,
unsigned int writemask, const struct vkd3d_shader_location *loc);
+void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs,
+ unsigned int writemask, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s,
unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block,
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index 8fcf6e6ac54..ba56ba90403 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -1916,12 +1916,6 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx,
if (!nonconst_index_from_deref(ctx, deref, &nonconst_i, &base, &scale, &count))
return false;
- if (hlsl_version_lt(ctx, 4, 0))
- {
- TRACE("Non-constant index propagation is not yet supported for SM1.\n");
- return false;
- }
-
VKD3D_ASSERT(count);
hlsl_block_init(&block);
@@ -1950,6 +1944,12 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx,
else if (x != idx->src.var)
goto done;
+ if (hlsl_version_lt(ctx, 4, 0) && x->is_uniform && ctx->profile->type != VKD3D_SHADER_TYPE_VERTEX)
+ {
+ TRACE("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name);
+ goto done;
+ }
+
if (i == 0)
{
path_len = idx->src.path_len;
@@ -2184,6 +2184,9 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx,
return false;
VKD3D_ASSERT(value->component == 0);
+ /* A uniform object should have never been written to. */
+ VKD3D_ASSERT(!deref->var->is_uniform);
+
/* Only HLSL_IR_LOAD can produce an object. */
load = hlsl_ir_load(value->node);
@@ -2488,6 +2491,554 @@ enum validation_result
DEREF_VALIDATION_NOT_CONSTANT,
};
+struct vectorize_exprs_state
+{
+ struct vectorizable_exprs_group
+ {
+ struct hlsl_block *block;
+ struct hlsl_ir_expr *exprs[4];
+ uint8_t expr_count, component_count;
+ } *groups;
+ size_t count, capacity;
+};
+
+static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b)
+{
+ /* TODO: We can also vectorize different constants. */
+
+ if (a->type == HLSL_IR_SWIZZLE)
+ a = hlsl_ir_swizzle(a)->val.node;
+ if (b->type == HLSL_IR_SWIZZLE)
+ b = hlsl_ir_swizzle(b)->val.node;
+
+ return a == b;
+}
+
+static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b)
+{
+ if (a->op != b->op)
+ return false;
+
+ for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j)
+ {
+ if (!a->operands[j].node)
+ break;
+ if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node))
+ return false;
+ }
+
+ return true;
+}
+
+static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state)
+{
+ if (expr->node.data_type->class > HLSL_CLASS_VECTOR)
+ return;
+
+ /* These are the only current ops that are not per-component. */
+ if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED
+ || expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD)
+ return;
+
+ for (size_t i = 0; i < state->count; ++i)
+ {
+ struct vectorizable_exprs_group *group = &state->groups[i];
+ struct hlsl_ir_expr *other = group->exprs[0];
+
+ /* These are SSA instructions, which means they have the same value
+ * regardless of what block they're in. However, being in different
+ * blocks may mean that one expression or the other is not always
+ * executed. */
+
+ if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4
+ && group->block == block
+ && is_same_vectorizable_expr(expr, other))
+ {
+ group->exprs[group->expr_count++] = expr;
+ group->component_count += expr->node.data_type->e.numeric.dimx;
+ return;
+ }
+ }
+
+ if (!hlsl_array_reserve(ctx, (void **)&state->groups,
+ &state->capacity, state->count + 1, sizeof(*state->groups)))
+ return;
+ state->groups[state->count].block = block;
+ state->groups[state->count].exprs[0] = expr;
+ state->groups[state->count].expr_count = 1;
+ state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx;
+ ++state->count;
+}
+
+static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct vectorize_exprs_state *state)
+{
+ struct hlsl_ir_node *instr;
+
+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
+ {
+ if (instr->type == HLSL_IR_EXPR)
+ {
+ record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state);
+ }
+ else if (instr->type == HLSL_IR_IF)
+ {
+ struct hlsl_ir_if *iff = hlsl_ir_if(instr);
+
+ find_vectorizable_expr_groups(ctx, &iff->then_block, state);
+ find_vectorizable_expr_groups(ctx, &iff->else_block, state);
+ }
+ else if (instr->type == HLSL_IR_LOOP)
+ {
+ find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state);
+ }
+ else if (instr->type == HLSL_IR_SWITCH)
+ {
+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
+ struct hlsl_ir_switch_case *c;
+
+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
+ find_vectorizable_expr_groups(ctx, &c->body, state);
+ }
+ }
+}
+
+/* Combine sequences like
+ *
+ * 3: @1.x
+ * 4: @2.x
+ * 5: @3 * @4
+ * 6: @1.y
+ * 7: @2.x
+ * 8: @6 * @7
+ *
+ * into
+ *
+ * 5_1: @1.xy
+ * 5_2: @2.xx
+ * 5_3: @5_1 * @5_2
+ * 5: @5_3.x
+ * 8: @5_3.y
+ *
+ * Each operand to an expression needs to refer to the same ultimate source
+ * (in this case @1 and @2 respectively), but can be a swizzle thereof.
+ *
+ * In practice the swizzles @5 and @8 can generally then be vectorized again,
+ * either as part of another expression, or as part of a store.
+ */
+static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block)
+{
+ struct vectorize_exprs_state state = {0};
+ bool progress = false;
+
+ find_vectorizable_expr_groups(ctx, block, &state);
+
+ for (unsigned int i = 0; i < state.count; ++i)
+ {
+ struct vectorizable_exprs_group *group = &state.groups[i];
+ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
+ uint32_t swizzles[HLSL_MAX_OPERANDS] = {0};
+ struct hlsl_ir_node *arg, *combined;
+ unsigned int component_count = 0;
+ struct hlsl_type *combined_type;
+ struct hlsl_block new_block;
+ struct hlsl_ir_expr *expr;
+
+ if (group->expr_count == 1)
+ continue;
+
+ hlsl_block_init(&new_block);
+
+ for (unsigned int j = 0; j < group->expr_count; ++j)
+ {
+ expr = group->exprs[j];
+
+ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
+ {
+ uint32_t arg_swizzle;
+
+ if (!(arg = expr->operands[a].node))
+ break;
+
+ if (arg->type == HLSL_IR_SWIZZLE)
+ arg_swizzle = hlsl_ir_swizzle(arg)->u.vector;
+ else
+ arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W);
+
+ /* Mask out the invalid components. */
+ arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1;
+ swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count);
+ }
+
+ component_count += expr->node.data_type->e.numeric.dimx;
+ }
+
+ expr = group->exprs[0];
+ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
+ {
+ if (!(arg = expr->operands[a].node))
+ break;
+ if (arg->type == HLSL_IR_SWIZZLE)
+ arg = hlsl_ir_swizzle(arg)->val.node;
+ args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc);
+ }
+
+ combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count);
+ combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc);
+
+ list_move_before(&expr->node.entry, &new_block.instrs);
+
+ TRACE("Combining %u %s instructions into %p.\n", group->expr_count,
+ debug_hlsl_expr_op(group->exprs[0]->op), combined);
+
+ component_count = 0;
+ for (unsigned int j = 0; j < group->expr_count; ++j)
+ {
+ struct hlsl_ir_node *replacement;
+
+ expr = group->exprs[j];
+
+ if (!(replacement = hlsl_new_swizzle(ctx,
+ HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count),
+ expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc)))
+ goto out;
+ component_count += expr->node.data_type->e.numeric.dimx;
+ list_add_before(&expr->node.entry, &replacement->entry);
+ hlsl_replace_node(&expr->node, replacement);
+ }
+
+ progress = true;
+ }
+
+out:
+ vkd3d_free(state.groups);
+ return progress;
+}
+
+struct vectorize_stores_state
+{
+ struct vectorizable_stores_group
+ {
+ struct hlsl_block *block;
+ /* We handle overlapping stores, because it's not really easier not to.
+ * In theory, then, we could collect an arbitrary number of stores here.
+ *
+ * In practice, overlapping stores are unlikely, and of course at most
+ * 4 stores can appear without overlap. Therefore, for simplicity, we
+ * just use a fixed array of 4.
+ *
+ * Since computing the writemask requires traversing the deref, and we
+ * need to do that anyway, we store it here for convenience. */
+ struct hlsl_ir_store *stores[4];
+ unsigned int path_len;
+ uint8_t writemasks[4];
+ uint8_t store_count;
+ bool dirty;
+ } *groups;
+ size_t count, capacity;
+};
+
+/* This must be a store to a subsection of a vector.
+ * In theory we can also vectorize stores to packed struct fields,
+ * but this requires target-specific knowledge and is probably best left
+ * to a VSIR pass. */
+static bool can_vectorize_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
+ unsigned int *path_len, uint8_t *writemask)
+{
+ struct hlsl_type *type = store->lhs.var->data_type;
+ unsigned int i;
+
+ if (store->rhs.node->data_type->class > HLSL_CLASS_VECTOR)
+ return false;
+
+ if (type->class == HLSL_CLASS_SCALAR)
+ return false;
+
+ for (i = 0; type->class != HLSL_CLASS_VECTOR && i < store->lhs.path_len; ++i)
+ type = hlsl_get_element_type_from_path_index(ctx, type, store->lhs.path[i].node);
+
+ if (type->class != HLSL_CLASS_VECTOR)
+ return false;
+
+ *path_len = i;
+
+ if (i < store->lhs.path_len)
+ {
+ struct hlsl_ir_constant *c;
+
+ /* This is a store to a scalar component of a vector, achieved via
+ * indexing. */
+
+ if (store->lhs.path[i].node->type != HLSL_IR_CONSTANT)
+ return false;
+ c = hlsl_ir_constant(store->lhs.path[i].node);
+ *writemask = (1u << c->value.u[0].u);
+ }
+ else
+ {
+ *writemask = store->writemask;
+ }
+
+ return true;
+}
+
+static bool derefs_are_same_vector(struct hlsl_ctx *ctx, const struct hlsl_deref *a, const struct hlsl_deref *b)
+{
+ struct hlsl_type *type = a->var->data_type;
+
+ if (a->var != b->var)
+ return false;
+
+ for (unsigned int i = 0; type->class != HLSL_CLASS_VECTOR && i < a->path_len && i < b->path_len; ++i)
+ {
+ if (a->path[i].node != b->path[i].node)
+ return false;
+ type = hlsl_get_element_type_from_path_index(ctx, type, a->path[i].node);
+ }
+
+ return true;
+}
+
+static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct hlsl_ir_store *store, struct vectorize_stores_state *state)
+{
+ unsigned int path_len;
+ uint8_t writemask;
+
+ if (!can_vectorize_store(ctx, store, &path_len, &writemask))
+ {
+ /* In the case of a dynamically indexed vector, we must invalidate
+ * any groups that statically index the same vector.
+ * For the sake of expediency, we go one step further and invalidate
+ * any groups that store to the same variable.
+ * (We also don't check that that was the reason why this store isn't
+ * vectorizable.)
+ * We could be more granular, but we'll defer that until it comes
+ * up in practice. */
+ for (size_t i = 0; i < state->count; ++i)
+ {
+ if (state->groups[i].stores[0]->lhs.var == store->lhs.var)
+ state->groups[i].dirty = true;
+ }
+ return;
+ }
+
+ for (size_t i = 0; i < state->count; ++i)
+ {
+ struct vectorizable_stores_group *group = &state->groups[i];
+ struct hlsl_ir_store *other = group->stores[0];
+
+ if (group->dirty)
+ continue;
+
+ if (derefs_are_same_vector(ctx, &store->lhs, &other->lhs))
+ {
+ /* Stores must be in the same CFG block. If they're not,
+ * they're not executed in exactly the same flow, and
+ * therefore can't be vectorized. */
+ if (group->block == block
+ && is_same_vectorizable_source(store->rhs.node, other->rhs.node))
+ {
+ if (group->store_count < ARRAY_SIZE(group->stores))
+ {
+ group->stores[group->store_count] = store;
+ group->writemasks[group->store_count] = writemask;
+ ++group->store_count;
+ return;
+ }
+ }
+ else
+ {
+ /* A store to the same vector with a different source, or in
+ * a different CFG block, invalidates any earlier store.
+ *
+ * A store to a component which *contains* the vector in
+ * question would also invalidate, but we should have split all
+ * of those by the time we get here. */
+ group->dirty = true;
+
+ /* Note that we do exit this loop early if we find a store A we
+ * can vectorize with, but that's fine. If there was a store B
+ * also in the state that we can't vectorize with, it would
+ * already have invalidated A. */
+ }
+ }
+ else
+ {
+ /* This could still be a store to the same vector, if e.g. the
+ * vector is part of a dynamically indexed array, or the path has
+ * two equivalent instructions which refer to the same component.
+ * [CSE may help with the latter, but we don't have it yet,
+ * and we shouldn't depend on it anyway.]
+ * For the sake of expediency, we just invalidate it if it refers
+ * to the same variable at all.
+ * As above, we could be more granular, but we'll defer that until
+ * it comes up in practice. */
+ if (store->lhs.var == other->lhs.var)
+ group->dirty = true;
+
+ /* As above, we don't need to worry about exiting the loop early. */
+ }
+ }
+
+ if (!hlsl_array_reserve(ctx, (void **)&state->groups,
+ &state->capacity, state->count + 1, sizeof(*state->groups)))
+ return;
+ state->groups[state->count].block = block;
+ state->groups[state->count].stores[0] = store;
+ state->groups[state->count].path_len = path_len;
+ state->groups[state->count].writemasks[0] = writemask;
+ state->groups[state->count].store_count = 1;
+ state->groups[state->count].dirty = false;
+ ++state->count;
+}
+
+static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block,
+ struct vectorize_stores_state *state)
+{
+ struct hlsl_ir_node *instr;
+
+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
+ {
+ if (instr->type == HLSL_IR_STORE)
+ {
+ record_vectorizable_store(ctx, block, hlsl_ir_store(instr), state);
+ }
+ else if (instr->type == HLSL_IR_LOAD)
+ {
+ struct hlsl_ir_var *var = hlsl_ir_load(instr)->src.var;
+
+ /* By vectorizing store A with store B, we are effectively moving
+ * store A down to happen at the same time as store B.
+ * If there was a load of the same variable between the two, this
+ * would be incorrect.
+ * Therefore invalidate all stores to this variable. As above, we
+ * could be more granular if necessary. */
+
+ for (unsigned int i = 0; i < state->count; ++i)
+ {
+ if (state->groups[i].stores[0]->lhs.var == var)
+ state->groups[i].dirty = true;
+ }
+ }
+ else if (instr->type == HLSL_IR_IF)
+ {
+ struct hlsl_ir_if *iff = hlsl_ir_if(instr);
+
+ find_vectorizable_store_groups(ctx, &iff->then_block, state);
+ find_vectorizable_store_groups(ctx, &iff->else_block, state);
+ }
+ else if (instr->type == HLSL_IR_LOOP)
+ {
+ find_vectorizable_store_groups(ctx, &hlsl_ir_loop(instr)->body, state);
+ }
+ else if (instr->type == HLSL_IR_SWITCH)
+ {
+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
+ struct hlsl_ir_switch_case *c;
+
+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
+ find_vectorizable_store_groups(ctx, &c->body, state);
+ }
+ }
+}
+
+/* Combine sequences like
+ *
+ * 2: @1.yw
+ * 3: @1.zy
+ * 4: var.xy = @2
+ * 5: var.yw = @3
+ *
+ * to
+ *
+ * 2: @1.yzy
+ * 5: var.xyw = @2
+ *
+ * There are a lot of gotchas here. We need to make sure the two stores are to
+ * the same vector (which may be embedded in a complex variable), that they're
+ * always executed in the same control flow, and that there aren't any other
+ * stores or loads on the same vector in the middle. */
+static bool vectorize_stores(struct hlsl_ctx *ctx, struct hlsl_block *block)
+{
+ struct vectorize_stores_state state = {0};
+ bool progress = false;
+
+ find_vectorizable_store_groups(ctx, block, &state);
+
+ for (unsigned int i = 0; i < state.count; ++i)
+ {
+ struct vectorizable_stores_group *group = &state.groups[i];
+ uint32_t new_swizzle = 0, new_writemask = 0;
+ struct hlsl_ir_node *new_rhs, *value;
+ uint32_t swizzle_components[4];
+ unsigned int component_count;
+ struct hlsl_ir_store *store;
+ struct hlsl_block new_block;
+
+ if (group->store_count == 1)
+ continue;
+
+ hlsl_block_init(&new_block);
+
+ /* Compute the swizzle components. */
+ for (unsigned int j = 0; j < group->store_count; ++j)
+ {
+ unsigned int writemask = group->writemasks[j];
+ uint32_t rhs_swizzle;
+
+ store = group->stores[j];
+
+ if (store->rhs.node->type == HLSL_IR_SWIZZLE)
+ rhs_swizzle = hlsl_ir_swizzle(store->rhs.node)->u.vector;
+ else
+ rhs_swizzle = HLSL_SWIZZLE(X, Y, Z, W);
+
+ component_count = 0;
+ for (unsigned int k = 0; k < 4; ++k)
+ {
+ if (writemask & (1u << k))
+ swizzle_components[k] = hlsl_swizzle_get_component(rhs_swizzle, component_count++);
+ }
+
+ new_writemask |= writemask;
+ }
+
+ /* Construct the new swizzle. */
+ component_count = 0;
+ for (unsigned int k = 0; k < 4; ++k)
+ {
+ if (new_writemask & (1u << k))
+ hlsl_swizzle_set_component(&new_swizzle, component_count++, swizzle_components[k]);
+ }
+
+ store = group->stores[0];
+ value = store->rhs.node;
+ if (value->type == HLSL_IR_SWIZZLE)
+ value = hlsl_ir_swizzle(value)->val.node;
+
+ new_rhs = hlsl_block_add_swizzle(ctx, &new_block, new_swizzle, component_count, value, &value->loc);
+ hlsl_block_add_store_parent(ctx, &new_block, &store->lhs,
+ group->path_len, new_rhs, new_writemask, &store->node.loc);
+
+ TRACE("Combining %u stores to %s.\n", group->store_count, store->lhs.var->name);
+
+ list_move_before(&group->stores[group->store_count - 1]->node.entry, &new_block.instrs);
+
+ for (unsigned int j = 0; j < group->store_count; ++j)
+ {
+ list_remove(&group->stores[j]->node.entry);
+ hlsl_free_instr(&group->stores[j]->node);
+ }
+
+ progress = true;
+ }
+
+ vkd3d_free(state.groups);
+ return progress;
+}
+
static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx,
const struct hlsl_deref *deref)
{
@@ -3123,6 +3674,11 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc
return false;
}
+static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const struct hlsl_deref *deref)
+{
+ return ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && deref->var->is_uniform;
+}
+
/* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant
* index into multiple constant loads, where the value of only one of them ends up in the resulting
* node.
@@ -3149,6 +3705,9 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n
if (deref->path_len == 0)
return false;
+ if (deref_supports_sm1_indirect_addressing(ctx, deref))
+ return false;
+
for (i = deref->path_len - 1; ; --i)
{
if (deref->path[i].node->type != HLSL_IR_CONSTANT)
@@ -7839,7 +8398,8 @@ static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, st
if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref))
return false;
- src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
+ if (src_param->reg.dimension != VSIR_DIMENSION_NONE)
+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
return true;
}
@@ -7869,7 +8429,6 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
struct vsir_program *program, struct hlsl_ir_constant *constant)
{
struct hlsl_ir_node *instr = &constant->node;
- struct vkd3d_shader_dst_param *dst_param;
struct vkd3d_shader_src_param *src_param;
struct vkd3d_shader_instruction *ins;
@@ -7881,13 +8440,11 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
src_param = &ins->src[0];
vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
src_param->reg.idx[0].offset = constant->reg.id;
src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask);
- dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- dst_param->reg.idx[0].offset = instr->reg.id;
- dst_param->write_mask = instr->reg.writemask;
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
}
static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx,
@@ -7974,11 +8531,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx
dst_param = &ins->dst[0];
vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
dst_param->reg.idx[0].offset = instr->reg.id;
+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
dst_param->write_mask = 1u << i;
src_param = &ins->src[0];
vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
src_param->reg.idx[0].offset = operand->reg.id;
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
c = vsir_swizzle_get_component(src_swizzle, i);
src_param->swizzle = vsir_swizzle_from_writemask(1u << c);
}
@@ -7990,7 +8549,6 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi
{
struct hlsl_ir_node *operand = expr->operands[0].node;
struct hlsl_ir_node *instr = &expr->node;
- struct vkd3d_shader_dst_param *dst_param;
struct vkd3d_shader_src_param *src_param;
struct vkd3d_shader_instruction *ins;
unsigned int src_count = 0;
@@ -8001,25 +8559,20 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count)))
return;
- dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- dst_param->reg.idx[0].offset = instr->reg.id;
- dst_param->write_mask = instr->reg.writemask;
-
- src_param = &ins->src[0];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = operand->reg.id;
- src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, VKD3DSP_WRITEMASK_ALL);
if (ctx->profile->major_version < 3)
{
src_param = &ins->src[1];
vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id;
src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
src_param = &ins->src[2];
vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id;
src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
}
@@ -8341,19 +8894,68 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx,
else
VKD3D_ASSERT(reg.allocated);
- vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1);
+ if (type == VKD3DSPR_DEPTHOUT)
+ {
+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0);
+ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR;
+ }
+ else
+ {
+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1);
+ dst_param->reg.idx[0].offset = register_index;
+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
+ }
dst_param->write_mask = writemask;
- dst_param->reg.idx[0].offset = register_index;
if (deref->rel_offset.node)
hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir.");
}
+static void sm1_generate_vsir_instr_mova(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_node *instr)
+{
+ enum vkd3d_shader_opcode opcode = hlsl_version_ge(ctx, 2, 0) ? VKD3DSIH_MOVA : VKD3DSIH_MOV;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_instruction *ins;
+
+ VKD3D_ASSERT(instr->reg.allocated);
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1)))
+ return;
+
+ dst_param = &ins->dst[0];
+ vsir_register_init(&dst_param->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0);
+ dst_param->write_mask = VKD3DSP_WRITEMASK_0;
+
+ VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR);
+ VKD3D_ASSERT(instr->data_type->e.numeric.dimx == 1);
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, instr, VKD3DSP_WRITEMASK_ALL);
+}
+
+static struct vkd3d_shader_src_param *sm1_generate_vsir_new_address_src(struct hlsl_ctx *ctx,
+ struct vsir_program *program)
+{
+ struct vkd3d_shader_src_param *idx_src;
+
+ if (!(idx_src = vsir_program_get_src_params(program, 1)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return NULL;
+ }
+
+ memset(idx_src, 0, sizeof(*idx_src));
+ vsir_register_init(&idx_src->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0);
+ idx_src->reg.dimension = VSIR_DIMENSION_VEC4;
+ idx_src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
+ return idx_src;
+}
+
static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
- struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref,
- unsigned int dst_writemask, const struct vkd3d_shader_location *loc)
+ struct vsir_program *program, struct vkd3d_shader_src_param *src_param,
+ struct hlsl_deref *deref, uint32_t dst_writemask, const struct vkd3d_shader_location *loc)
{
enum vkd3d_shader_register_type type = VKD3DSPR_TEMP;
+ struct vkd3d_shader_src_param *src_rel_addr = NULL;
struct vkd3d_shader_version version;
uint32_t register_index;
unsigned int writemask;
@@ -8371,12 +8973,26 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
}
else if (deref->var->is_uniform)
{
+ unsigned int offset = deref->const_offset;
+
type = VKD3DSPR_CONST;
+ register_index = deref->var->regs[HLSL_REGSET_NUMERIC].id + offset / 4;
- reg = hlsl_reg_from_deref(ctx, deref);
- register_index = reg.id;
- writemask = reg.writemask;
- VKD3D_ASSERT(reg.allocated);
+ writemask = 0xf & (0xf << (offset % 4));
+ if (deref->var->regs[HLSL_REGSET_NUMERIC].writemask)
+ writemask = hlsl_combine_writemasks(deref->var->regs[HLSL_REGSET_NUMERIC].writemask, writemask);
+
+ if (deref->rel_offset.node)
+ {
+ VKD3D_ASSERT(deref_supports_sm1_indirect_addressing(ctx, deref));
+
+ if (!(src_rel_addr = sm1_generate_vsir_new_address_src(ctx, program)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+ }
+ VKD3D_ASSERT(deref->var->regs[HLSL_REGSET_NUMERIC].allocated);
}
else if (deref->var->is_input_semantic)
{
@@ -8408,32 +9024,30 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
}
vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1);
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
src_param->reg.idx[0].offset = register_index;
+ src_param->reg.idx[0].rel_addr = src_rel_addr;
src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
-
- if (deref->rel_offset.node)
- hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir.");
}
static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program,
struct hlsl_ir_load *load)
{
struct hlsl_ir_node *instr = &load->node;
- struct vkd3d_shader_dst_param *dst_param;
struct vkd3d_shader_instruction *ins;
VKD3D_ASSERT(instr->reg.allocated);
+ if (load->src.rel_offset.node)
+ sm1_generate_vsir_instr_mova(ctx, program, load->src.rel_offset.node);
+
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
return;
- dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- dst_param->reg.idx[0].offset = instr->reg.id;
- dst_param->write_mask = instr->reg.writemask;
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask,
- &ins->location);
+ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[0],
+ &load->src, ins->dst[0].write_mask, &ins->location);
}
static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
@@ -8443,7 +9057,6 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
struct hlsl_ir_node *ddx = load->ddx.node;
struct hlsl_ir_node *ddy = load->ddy.node;
struct hlsl_ir_node *instr = &load->node;
- struct vkd3d_shader_dst_param *dst_param;
struct vkd3d_shader_src_param *src_param;
struct vkd3d_shader_instruction *ins;
enum vkd3d_shader_opcode opcode;
@@ -8482,15 +9095,12 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
return;
ins->flags = flags;
- dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- dst_param->reg.idx[0].offset = instr->reg.id;
- dst_param->write_mask = instr->reg.writemask;
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
src_param = &ins->src[0];
vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL);
- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource,
+ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], &load->resource,
VKD3DSP_WRITEMASK_ALL, &ins->location);
if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
@@ -8507,7 +9117,6 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr)
{
struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node;
- struct vkd3d_shader_dst_param *dst_param;
struct vkd3d_shader_src_param *src_param;
struct vkd3d_shader_instruction *ins;
uint32_t swizzle;
@@ -8517,11 +9126,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
return;
- dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
- dst_param->reg.idx[0].offset = instr->reg.id;
- dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
- dst_param->write_mask = instr->reg.writemask;
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
swizzle = hlsl_swizzle_from_writemask(val->reg.writemask);
swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx);
@@ -8557,7 +9162,6 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
{
struct hlsl_ir_node *condition = jump->condition.node;
struct hlsl_ir_node *instr = &jump->node;
- struct vkd3d_shader_dst_param *dst_param;
struct vkd3d_shader_instruction *ins;
if (jump->type == HLSL_IR_JUMP_DISCARD_NEG)
@@ -8565,10 +9169,7 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0)))
return;
- dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- dst_param->reg.idx[0].offset = condition->reg.id;
- dst_param->write_mask = condition->reg.writemask;
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, condition);
}
else
{
@@ -8689,6 +9290,10 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
return;
}
+ program->temp_count = allocate_temp_registers(ctx, entry_func);
+ if (ctx->result)
+ return;
+
generate_vsir_signature(ctx, program, entry_func);
hlsl_block_init(&block);
@@ -12532,6 +13137,7 @@ static void process_entry_function(struct hlsl_ctx *ctx,
struct recursive_call_ctx recursive_call_ctx;
struct hlsl_ir_var *var;
unsigned int i;
+ bool progress;
ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func;
@@ -12709,6 +13315,9 @@ static void process_entry_function(struct hlsl_ctx *ctx,
hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL);
}
+ compute_liveness(ctx, entry_func);
+ transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body);
+
loop_unrolling_execute(ctx, body);
hlsl_run_const_passes(ctx, body);
@@ -12719,13 +13328,21 @@ static void process_entry_function(struct hlsl_ctx *ctx,
lower_ir(ctx, lower_casts_to_bool, body);
lower_ir(ctx, lower_int_dot, body);
- compute_liveness(ctx, entry_func);
- transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body);
-
if (hlsl_version_lt(ctx, 4, 0))
hlsl_transform_ir(ctx, lower_separate_samples, body, NULL);
hlsl_transform_ir(ctx, validate_dereferences, body, NULL);
+
+ do
+ {
+ progress = vectorize_exprs(ctx, body);
+ compute_liveness(ctx, entry_func);
+ progress |= hlsl_transform_ir(ctx, dce, body, NULL);
+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL);
+ progress |= vectorize_stores(ctx, body);
+ } while (progress);
+
hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL);
if (hlsl_version_ge(ctx, 4, 0))
@@ -12847,7 +13464,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
if (profile->major_version < 4)
{
mark_indexable_vars(ctx, entry_func);
- allocate_temp_registers(ctx, entry_func);
allocate_const_registers(ctx, entry_func);
sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS);
allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index 3a784c71388..72cf53761e4 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -470,6 +470,80 @@ static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_d
return false;
}
+static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
+{
+ struct vkd3d_shader_instruction *ins, *ins2;
+ unsigned int tmp_idx = ~0u;
+ unsigned int i, k, r;
+
+ for (i = 0; i < program->instructions.count; ++i)
+ {
+ ins = &program->instructions.elements[i];
+
+ if (ins->opcode == VKD3DSIH_MOV && ins->dst[0].reg.type == VKD3DSPR_ADDR)
+ {
+ if (tmp_idx == ~0u)
+ tmp_idx = program->temp_count++;
+
+ ins->opcode = VKD3DSIH_FTOU;
+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
+ ins->dst[0].reg.idx[0].offset = tmp_idx;
+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ }
+ else if (ins->opcode == VKD3DSIH_MOVA)
+ {
+ if (tmp_idx == ~0u)
+ tmp_idx = program->temp_count++;
+
+ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ ins = &program->instructions.elements[i];
+ ins2 = &program->instructions.elements[i + 1];
+
+ ins->opcode = VKD3DSIH_ROUND_NE;
+ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ ins->dst[0].reg.idx[0].offset = tmp_idx;
+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
+
+ if (!vsir_instruction_init_with_params(program, ins2, &ins->location, VKD3DSIH_FTOU, 1, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ vsir_register_init(&ins2->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
+ ins2->dst[0].reg.idx[0].offset = tmp_idx;
+ ins2->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins2->dst[0].write_mask = ins->dst[0].write_mask;
+
+ vsir_register_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ ins2->src[0].reg.idx[0].offset = tmp_idx;
+ ins2->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins2->src[0].swizzle = vsir_swizzle_from_writemask(ins2->dst[0].write_mask);
+ }
+
+ for (k = 0; k < ins->src_count; ++k)
+ {
+ struct vkd3d_shader_src_param *src = &ins->src[k];
+
+ for (r = 0; r < src->reg.idx_count; ++r)
+ {
+ struct vkd3d_shader_src_param *rel = src->reg.idx[r].rel_addr;
+
+ if (rel && rel->reg.type == VKD3DSPR_ADDR)
+ {
+ if (tmp_idx == ~0u)
+ tmp_idx = program->temp_count++;
+
+ vsir_register_init(&rel->reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
+ rel->reg.idx[0].offset = tmp_idx;
+ rel->reg.dimension = VSIR_DIMENSION_VEC4;
+ }
+ }
+ }
+ }
+
+ return VKD3D_OK;
+}
+
static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program,
struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx,
struct vkd3d_shader_message_context *message_context)
@@ -482,6 +556,7 @@ static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program,
if (!shader_instruction_array_insert_at(instructions, pos + 1, 2))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ ifc = &instructions->elements[pos];
if (*tmp_idx == ~0u)
*tmp_idx = program->temp_count++;
@@ -535,6 +610,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program
if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ texkill = &instructions->elements[pos];
if (*tmp_idx == ~0u)
*tmp_idx = program->temp_count++;
@@ -621,6 +697,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro
if (!shader_instruction_array_insert_at(instructions, pos + 1, 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ mad = &instructions->elements[pos];
if (*tmp_idx == ~0u)
*tmp_idx = program->temp_count++;
@@ -665,6 +742,7 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog
if (!shader_instruction_array_insert_at(instructions, pos + 1, 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ sincos = &instructions->elements[pos];
ins = &instructions->elements[pos + 1];
@@ -717,6 +795,7 @@ static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program,
if (!shader_instruction_array_insert_at(instructions, pos + 1, 2))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ tex = &instructions->elements[pos];
if (*tmp_idx == ~0u)
*tmp_idx = program->temp_count++;
@@ -1128,6 +1207,7 @@ static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *progra
if (!shader_instruction_array_insert_at(&program->instructions, i, 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
ins = &program->instructions.elements[i];
+
vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1);
vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1);
ins->dst[0].reg.idx[0].offset = 0;
@@ -1346,7 +1426,6 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
loc = ins->location;
if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count))
return VKD3D_ERROR_OUT_OF_MEMORY;
-
ins = &program->instructions.elements[i];
for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j)
@@ -2426,7 +2505,8 @@ struct flat_constants_normaliser
};
static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg,
- enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index)
+ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index,
+ struct vkd3d_shader_src_param **rel_addr)
{
static const struct
{
@@ -2446,12 +2526,8 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register *
{
if (reg->type == regs[i].type)
{
- if (reg->idx[0].rel_addr)
- {
- FIXME("Unhandled relative address.\n");
- return false;
- }
-
+ if (rel_addr)
+ *rel_addr = reg->idx[0].rel_addr;
*set = regs[i].set;
*index = reg->idx[0].offset;
return true;
@@ -2465,10 +2541,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par
const struct flat_constants_normaliser *normaliser)
{
enum vkd3d_shader_d3dbc_constant_register set;
+ struct vkd3d_shader_src_param *rel_addr;
uint32_t index;
size_t i, j;
- if (!get_flat_constant_register_type(&param->reg, &set, &index))
+ if (!get_flat_constant_register_type(&param->reg, &set, &index, &rel_addr))
return;
for (i = 0; i < normaliser->def_count; ++i)
@@ -2486,8 +2563,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par
param->reg.type = VKD3DSPR_CONSTBUFFER;
param->reg.idx[0].offset = set; /* register ID */
+ param->reg.idx[0].rel_addr = NULL;
param->reg.idx[1].offset = set; /* register index */
+ param->reg.idx[1].rel_addr = NULL;
param->reg.idx[2].offset = index; /* buffer index */
+ param->reg.idx[2].rel_addr = rel_addr;
param->reg.idx_count = 3;
}
@@ -2514,7 +2594,7 @@ static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_progr
def = &normaliser.defs[normaliser.def_count++];
- get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index);
+ get_flat_constant_register_type(&ins->dst[0].reg, &def->set, &def->index, NULL);
for (j = 0; j < 4; ++j)
def->value[j] = ins->src[0].reg.u.immconst_u32[j];
@@ -6037,6 +6117,7 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr
uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context)
{
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ const struct vkd3d_shader_location loc = ret->location;
static const struct vkd3d_shader_location no_loc;
size_t pos = ret - instructions->elements;
struct vkd3d_shader_instruction *ins;
@@ -6061,9 +6142,10 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr
{
if (!shader_instruction_array_insert_at(&program->instructions, pos, 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ ret = NULL;
ins = &program->instructions.elements[pos];
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1);
ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z;
src_param_init_const_uint(&ins->src[0], 0);
@@ -6073,20 +6155,20 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr
if (!shader_instruction_array_insert_at(&program->instructions, pos, 3))
return VKD3D_ERROR_OUT_OF_MEMORY;
-
+ ret = NULL;
ins = &program->instructions.elements[pos];
switch (ref->data_type)
{
case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32:
- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2);
+ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].float_opcode, 1, 2);
src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp);
src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1],
VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT);
break;
case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32:
- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2);
+ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].uint_opcode, 1, 2);
src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp);
src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1],
VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT);
@@ -6107,14 +6189,14 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr
ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W);
++ins;
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1);
ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z;
src_param_init_ssa_bool(&ins->src[0], program->ssa_count);
++program->ssa_count;
++ins;
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1);
vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1);
ins->dst[0].reg.idx[0].offset = colour_signature_idx;
ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
@@ -6215,13 +6297,14 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog
uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos)
{
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ const struct vkd3d_shader_location loc = ret->location;
size_t pos = ret - instructions->elements;
struct vkd3d_shader_instruction *ins;
unsigned int output_idx = 0;
if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
-
+ ret = NULL;
ins = &program->instructions.elements[pos];
for (unsigned int i = 0; i < 8; ++i)
@@ -6229,7 +6312,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog
if (!(mask & (1u << i)))
continue;
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DP4, 1, 2);
src_param_init_temp_float4(&ins->src[0], position_temp);
src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT);
ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE;
@@ -6247,7 +6330,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog
++ins;
}
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1);
vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1);
ins->dst[0].reg.idx[0].offset = position_signature_idx;
ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
@@ -6404,15 +6487,16 @@ static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *progr
const struct vkd3d_shader_instruction *ret, size_t *ret_pos)
{
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ const struct vkd3d_shader_location loc = ret->location;
size_t pos = ret - instructions->elements;
struct vkd3d_shader_instruction *ins;
if (!shader_instruction_array_insert_at(&program->instructions, pos, 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
-
+ ret = NULL;
ins = &program->instructions.elements[pos];
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1);
vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1);
ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE;
src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT);
@@ -6541,9 +6625,9 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra
if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ ins = &program->instructions.elements[i + 1];
loc = &program->instructions.elements[i].location;
- ins = &program->instructions.elements[i + 1];
if (min_parameter)
{
@@ -6741,7 +6825,6 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr
{
if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2))
return VKD3D_ERROR_OUT_OF_MEMORY;
-
ins = &program->instructions.elements[insert_pos];
vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1);
@@ -6815,6 +6898,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro
*/
if (!shader_instruction_array_insert_at(&program->instructions, pos, 4))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ ret = NULL;
+
*ret_pos = pos + 4;
ssa_temp = program->ssa_count++;
@@ -6845,6 +6930,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro
*/
if (!shader_instruction_array_insert_at(&program->instructions, pos, 4))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ ret = NULL;
+
*ret_pos = pos + 4;
ssa_temp = program->ssa_count++;
@@ -6875,6 +6962,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro
*/
if (!shader_instruction_array_insert_at(&program->instructions, pos, 5))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ ret = NULL;
+
*ret_pos = pos + 5;
ssa_temp = program->ssa_count++;
@@ -7053,16 +7142,18 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr
{
const struct signature_element *e = &program->output_signature.elements[source_signature_idx];
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ const struct vkd3d_shader_location loc = ret->location;
size_t pos = ret - instructions->elements;
struct vkd3d_shader_instruction *ins;
if (!shader_instruction_array_insert_at(&program->instructions, pos, 2))
return VKD3D_ERROR_OUT_OF_MEMORY;
+ ret = NULL;
ins = &program->instructions.elements[pos];
/* Write the fog output. */
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1);
dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1);
src_param_init_temp_float4(&ins->src[0], temp);
if (source == VKD3D_SHADER_FOG_SOURCE_Z)
@@ -7072,7 +7163,7 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr
++ins;
/* Write the position or specular output. */
- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1);
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1);
dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type),
source_signature_idx, e->mask);
src_param_init_temp_float4(&ins->src[0], temp);
@@ -7707,6 +7798,33 @@ static void vsir_validate_label_register(struct validation_context *ctx,
reg->idx[0].offset, ctx->program->block_count);
}
+static void vsir_validate_descriptor_indices(struct validation_context *ctx,
+ const struct vkd3d_shader_register *reg, enum vkd3d_shader_descriptor_type type, const char *name)
+{
+ const struct vkd3d_shader_descriptor_info1 *descriptor;
+
+ if (reg->idx[0].rel_addr)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Non-NULL indirect address for the ID of a register of type \"%s\".", name);
+
+ if (!ctx->program->has_descriptor_info)
+ return;
+
+ if (!(descriptor = vkd3d_shader_find_descriptor(&ctx->program->descriptors, type, reg->idx[0].offset)))
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "No matching descriptor found for register %s%u.", name, reg->idx[0].offset);
+ return;
+ }
+
+ if (!reg->idx[1].rel_addr && (reg->idx[1].offset < descriptor->register_index
+ || reg->idx[1].offset - descriptor->register_index >= descriptor->count))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Register index %u doesn't belong to the range [%u, %u] for register %s%u.",
+ reg->idx[1].offset, descriptor->register_index,
+ descriptor->register_index + descriptor->count - 1, name, reg->idx[0].offset);
+}
+
static void vsir_validate_constbuffer_register(struct validation_context *ctx,
const struct vkd3d_shader_register *reg)
{
@@ -7725,9 +7843,7 @@ static void vsir_validate_constbuffer_register(struct validation_context *ctx,
return;
}
- if (reg->idx[0].rel_addr)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
- "Non-NULL relative address for a CONSTBUFFER register ID.");
+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, "cb");
}
static void vsir_validate_sampler_register(struct validation_context *ctx,
@@ -7753,9 +7869,7 @@ static void vsir_validate_sampler_register(struct validation_context *ctx,
return;
}
- if (reg->idx[0].rel_addr)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
- "Non-NULL relative address for the descriptor index of a SAMPLER register.");
+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, "s");
}
static void vsir_validate_resource_register(struct validation_context *ctx,
@@ -7780,9 +7894,7 @@ static void vsir_validate_resource_register(struct validation_context *ctx,
return;
}
- if (reg->idx[0].rel_addr)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
- "Non-NULL relative address for the descriptor index of a RESOURCE register.");
+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, "t");
}
static void vsir_validate_uav_register(struct validation_context *ctx,
@@ -7812,9 +7924,7 @@ static void vsir_validate_uav_register(struct validation_context *ctx,
return;
}
- if (reg->idx[0].rel_addr)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
- "Non-NULL relative address for the descriptor index of a UAV register.");
+ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, "u");
}
static void vsir_validate_ssa_register(struct validation_context *ctx,
@@ -9824,6 +9934,9 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t
if (program->shader_version.major <= 2)
vsir_transform(&ctx, vsir_program_ensure_diffuse);
+ if (program->shader_version.major < 4)
+ vsir_transform(&ctx, vsir_program_normalize_addr);
+
if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
vsir_transform(&ctx, vsir_program_remap_output_signature);
diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c
index 756b43298d3..a5d952cd525 100644
--- a/libs/vkd3d/libs/vkd3d-shader/msl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c
@@ -1310,6 +1310,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags,
return ret;
VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
+ VKD3D_ASSERT(program->has_descriptor_info);
if ((ret = msl_generator_init(&generator, program, compile_info, message_context)) < 0)
return ret;
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index 0cc1ceca798..91a6686eb0d 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -11361,6 +11361,7 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags,
return ret;
VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6);
+ VKD3D_ASSERT(program->has_descriptor_info);
if (!(spirv_compiler = spirv_compiler_create(program, compile_info,
message_context, config_flags)))
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
index 2afeff086e5..9191429c439 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
@@ -1127,7 +1127,7 @@ static void vkd3d_shader_scan_combined_sampler_declaration(
&semantic->resource.range, semantic->resource_type, VKD3D_SHADER_RESOURCE_DATA_FLOAT);
}
-static const struct vkd3d_shader_descriptor_info1 *find_descriptor(
+const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor(
const struct vkd3d_shader_scan_descriptor_info1 *info,
enum vkd3d_shader_descriptor_type type, unsigned int register_id)
{
@@ -1181,11 +1181,11 @@ static void vkd3d_shader_scan_combined_sampler_usage(struct vkd3d_shader_scan_co
if (dynamic_resource || dynamic_sampler)
return;
- if ((d = find_descriptor(context->scan_descriptor_info,
+ if ((d = vkd3d_shader_find_descriptor(context->scan_descriptor_info,
VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource->idx[0].offset)))
resource_space = d->register_space;
- if (sampler && (d = find_descriptor(context->scan_descriptor_info,
+ if (sampler && (d = vkd3d_shader_find_descriptor(context->scan_descriptor_info,
VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler->idx[0].offset)))
sampler_space = d->register_space;
}
@@ -1606,6 +1606,9 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh
vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info,
add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context);
+ if (add_descriptor_info)
+ program->has_descriptor_info = true;
+
if (TRACE_ON())
vsir_program_trace(program);
@@ -2046,6 +2049,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
VKD3D_SHADER_TARGET_SPIRV_BINARY,
#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER)
VKD3D_SHADER_TARGET_SPIRV_TEXT,
+#endif
+#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL
+ VKD3D_SHADER_TARGET_GLSL,
#endif
VKD3D_SHADER_TARGET_D3D_ASM,
VKD3D_SHADER_TARGET_D3D_BYTECODE,
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index e794257b9d8..bf794d5e936 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -1444,6 +1444,9 @@ struct vkd3d_shader_scan_descriptor_info1
unsigned int descriptor_count;
};
+const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor(
+ const struct vkd3d_shader_scan_descriptor_info1 *info,
+ enum vkd3d_shader_descriptor_type type, unsigned int register_id);
void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info);
struct vsir_program
@@ -1456,6 +1459,7 @@ struct vsir_program
struct shader_signature patch_constant_signature;
struct vkd3d_shader_scan_descriptor_info1 descriptors;
+ bool has_descriptor_info;
unsigned int parameter_count;
const struct vkd3d_shader_parameter1 *parameters;
--
2.47.2