diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch index 82a46db3..4c56b616 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-81dc67b1ef411ea3cd7c09e6e935765ac15.patch @@ -1,4 +1,4 @@ -From 54ef4a50bd86dab71334c948bc607695c22c0662 Mon Sep 17 00:00:00 2001 +From a616817217f5d5276a38ec1aac6ffc26506a0740 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 21 Feb 2025 09:15:01 +1100 Subject: [PATCH] Updated vkd3d to 81dc67b1ef411ea3cd7c09e6e935765ac1583d28. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch index 018a5d5c..aa2e96bf 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-a4f58be00c58e06b5bd60bec7eb9e37b6f1.patch @@ -1,4 +1,4 @@ -From 55c8b8c0229af94159f34ac5089fec5ff0ad98de Mon Sep 17 00:00:00 2001 +From 83a3253798716f86bf8ace1e2cf0fbbd829e614a Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 14 Mar 2025 08:23:50 +1100 Subject: [PATCH] Updated vkd3d to a4f58be00c58e06b5bd60bec7eb9e37b6f112c24. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch index 7aa56311..f60d27b6 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-e418bbcfac74230dfbf0c49b72c4f059cb1.patch @@ -1,4 +1,4 @@ -From b779a594a10dc6fdda19a1445305333982f89d74 Mon Sep 17 00:00:00 2001 +From 6d98fd430eead31dfd3e0c71e883d5225b0e1876 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 18 Mar 2025 08:53:16 +1100 Subject: [PATCH] Updated vkd3d to e418bbcfac74230dfbf0c49b72c4f059cb1afc00. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch index 1eb47f32..1d0dd619 100644 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-110edf32d0b2a2f0a49cdd76c977b9eedd0.patch @@ -1,4 +1,4 @@ -From 00ea2e03290596906468365901a49fac111ff76a Mon Sep 17 00:00:00 2001 +From 19e486ddd73608536af85ffd79aafb2b99a52bc2 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 20 Mar 2025 06:34:08 +1100 Subject: [PATCH] Updated vkd3d to 110edf32d0b2a2f0a49cdd76c977b9eedd06628e. diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch new file mode 100644 index 00000000..4dfedd70 --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-f576ecc9929dd98c900bb8bc0335b91a1a0.patch @@ -0,0 +1,1779 @@ +From 86412218c44000e79015064e3efe32a3783dab6b Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Fri, 4 Apr 2025 07:59:53 +1100 +Subject: [PATCH] Updated vkd3d to f576ecc9929dd98c900bb8bc0335b91a1a0d3bff. + +--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 32 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 4 +- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 3 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 38 + + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 + + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 738 ++++++++++++++++-- + libs/vkd3d/libs/vkd3d-shader/ir.c | 187 ++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 1 + + libs/vkd3d/libs/vkd3d-shader/spirv.c | 1 + + .../libs/vkd3d-shader/vkd3d_shader_main.c | 12 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 4 + + 11 files changed, 911 insertions(+), 112 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 58e35cf22e8..b49ef9865db 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1759,27 +1759,40 @@ static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins) + + static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg) + { ++ uint32_t offset = reg->reg.idx_count ? reg->reg.idx[0].offset : 0; ++ + VKD3D_ASSERT(reg->write_mask); + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(®->reg) + | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) + | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) +- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); ++ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); + } + + static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) + { ++ uint32_t address_mode = VKD3D_SM1_ADDRESS_MODE_ABSOLUTE, offset = 0; ++ ++ if (reg->reg.idx_count) ++ { ++ offset = reg->reg.idx[0].offset; ++ if (reg->reg.idx[0].rel_addr) ++ address_mode = VKD3D_SM1_ADDRESS_MODE_RELATIVE; ++ } ++ + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER + | sm1_encode_register_type(®->reg) ++ | (address_mode << VKD3D_SM1_ADDRESS_MODE_SHIFT) + | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) + | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) +- | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); ++ | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); + } + + static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) + { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; ++ const struct vkd3d_shader_src_param *src; + const struct vkd3d_sm1_opcode_info *info; + unsigned int i; + uint32_t token; +@@ -1810,13 +1823,10 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v + + for (i = 0; i < ins->src_count; ++i) + { +- if (ins->src[i].reg.idx[0].rel_addr) +- { +- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, +- "Unhandled relative addressing on source register."); +- d3dbc->failed = true; +- } +- write_sm1_src_register(buffer, &ins->src[i]); ++ src = &ins->src[i]; ++ write_sm1_src_register(buffer, src); ++ if (src->reg.idx_count && src->reg.idx[0].rel_addr) ++ write_sm1_src_register(buffer, src->reg.idx[0].rel_addr); + } + }; + +@@ -1831,6 +1841,7 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 + .reg.type = VKD3DSPR_CONST, + .write_mask = VKD3DSP_WRITEMASK_ALL, + .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, ++ .reg.idx_count = 1, + }; + + token = VKD3D_SM1_OP_DEF; +@@ -1863,6 +1874,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, + reg.reg.type = VKD3DSPR_COMBINED_SAMPLER; + reg.write_mask = VKD3DSP_WRITEMASK_ALL; + reg.reg.idx[0].offset = reg_id; ++ reg.reg.idx_count = 1; + + write_sm1_dst_register(buffer, ®); + } +@@ -1938,6 +1950,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str + case VKD3DSIH_MAX: + case VKD3DSIH_MIN: + case VKD3DSIH_MOV: ++ case VKD3DSIH_MOVA: + case VKD3DSIH_MUL: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SLT: +@@ -1982,6 +1995,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + uint32_t token, usage_idx; + bool ret; + ++ reg.reg.idx_count = 1; + if (sm1_register_from_semantic_name(version, element->semantic_name, + element->semantic_index, output, ®.reg.type, ®.reg.idx[0].offset)) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 7a226c1c870..debcb261811 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -2366,6 +2366,7 @@ static inline bool is_object_fx_type(enum state_property_component_type type) + case FX_BLEND: + case FX_VERTEXSHADER: + case FX_PIXELSHADER: ++ case FX_GEOMETRYSHADER: + return true; + default: + return false; +@@ -2761,7 +2762,8 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + struct hlsl_ir_constant *c = hlsl_ir_constant(node); + struct hlsl_type *data_type = c->node.data_type; + +- if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT) ++ if (data_type->class == HLSL_CLASS_SCALAR ++ && (data_type->e.numeric.type == HLSL_TYPE_INT || data_type->e.numeric.type == HLSL_TYPE_UINT)) + { + if (c->value.u[0].u != 0) + hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 828a94d77ab..a87ade5e467 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -1296,7 +1296,7 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled SV_POSITION index %u.", idx); + if (version->type == VKD3D_SHADER_TYPE_PIXEL) +- vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); ++ vkd3d_string_buffer_printf(buffer, "vec4(gl_FragCoord.xyz, 1.0 / gl_FragCoord.w)"); + else + vkd3d_string_buffer_printf(buffer, "gl_Position"); + break; +@@ -2468,6 +2468,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, + return ret; + + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); ++ VKD3D_ASSERT(program->has_descriptor_info); + + vkd3d_glsl_generator_init(&generator, program, compile_info, + combined_sampler_info, message_context); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 01586592b25..d1d20b7384c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -1588,6 +1588,43 @@ void hlsl_block_add_simple_store(struct hlsl_ctx *ctx, struct hlsl_block *block, + hlsl_block_add_store_index(ctx, block, &lhs_deref, NULL, rhs, 0, &rhs->loc); + } + ++static struct hlsl_ir_node *hlsl_new_store_parent(struct hlsl_ctx *ctx, ++ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, ++ unsigned int writemask, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_store *store; ++ ++ VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); ++ VKD3D_ASSERT(lhs->path_len >= path_len); ++ ++ if (!(store = hlsl_alloc(ctx, sizeof(*store)))) ++ return NULL; ++ init_node(&store->node, HLSL_IR_STORE, NULL, loc); ++ ++ if (!hlsl_init_deref(ctx, &store->lhs, lhs->var, path_len)) ++ { ++ vkd3d_free(store); ++ return NULL; ++ } ++ for (unsigned int i = 0; i < path_len; ++i) ++ hlsl_src_from_node(&store->lhs.path[i], lhs->path[i].node); ++ ++ hlsl_src_from_node(&store->rhs, rhs); ++ ++ if (!writemask && type_is_single_reg(rhs->data_type)) ++ writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; ++ store->writemask = writemask; ++ ++ return &store->node; ++} ++ ++void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, ++ unsigned int writemask, const struct vkd3d_shader_location *loc) ++{ ++ append_new_instr(ctx, block, hlsl_new_store_parent(ctx, lhs, path_len, rhs, writemask, loc)); ++} ++ + void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) + { +@@ -4957,6 +4994,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d + + if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY + || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT ++ || target_type == VKD3D_SHADER_TARGET_GLSL + || target_type == VKD3D_SHADER_TARGET_D3D_ASM) + { + uint64_t config_flags = vkd3d_shader_init_config_flags(); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 1d78c5622de..fafa5740963 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -1565,6 +1565,9 @@ void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *blo + void hlsl_block_add_store_index(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, + unsigned int writemask, const struct vkd3d_shader_location *loc); ++void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, ++ unsigned int writemask, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, + unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 8fcf6e6ac54..ba56ba90403 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -1916,12 +1916,6 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, + if (!nonconst_index_from_deref(ctx, deref, &nonconst_i, &base, &scale, &count)) + return false; + +- if (hlsl_version_lt(ctx, 4, 0)) +- { +- TRACE("Non-constant index propagation is not yet supported for SM1.\n"); +- return false; +- } +- + VKD3D_ASSERT(count); + + hlsl_block_init(&block); +@@ -1950,6 +1944,12 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, + else if (x != idx->src.var) + goto done; + ++ if (hlsl_version_lt(ctx, 4, 0) && x->is_uniform && ctx->profile->type != VKD3D_SHADER_TYPE_VERTEX) ++ { ++ TRACE("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name); ++ goto done; ++ } ++ + if (i == 0) + { + path_len = idx->src.path_len; +@@ -2184,6 +2184,9 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, + return false; + VKD3D_ASSERT(value->component == 0); + ++ /* A uniform object should have never been written to. */ ++ VKD3D_ASSERT(!deref->var->is_uniform); ++ + /* Only HLSL_IR_LOAD can produce an object. */ + load = hlsl_ir_load(value->node); + +@@ -2488,6 +2491,554 @@ enum validation_result + DEREF_VALIDATION_NOT_CONSTANT, + }; + ++struct vectorize_exprs_state ++{ ++ struct vectorizable_exprs_group ++ { ++ struct hlsl_block *block; ++ struct hlsl_ir_expr *exprs[4]; ++ uint8_t expr_count, component_count; ++ } *groups; ++ size_t count, capacity; ++}; ++ ++static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b) ++{ ++ /* TODO: We can also vectorize different constants. */ ++ ++ if (a->type == HLSL_IR_SWIZZLE) ++ a = hlsl_ir_swizzle(a)->val.node; ++ if (b->type == HLSL_IR_SWIZZLE) ++ b = hlsl_ir_swizzle(b)->val.node; ++ ++ return a == b; ++} ++ ++static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b) ++{ ++ if (a->op != b->op) ++ return false; ++ ++ for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j) ++ { ++ if (!a->operands[j].node) ++ break; ++ if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node)) ++ return false; ++ } ++ ++ return true; ++} ++ ++static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state) ++{ ++ if (expr->node.data_type->class > HLSL_CLASS_VECTOR) ++ return; ++ ++ /* These are the only current ops that are not per-component. */ ++ if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED ++ || expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD) ++ return; ++ ++ for (size_t i = 0; i < state->count; ++i) ++ { ++ struct vectorizable_exprs_group *group = &state->groups[i]; ++ struct hlsl_ir_expr *other = group->exprs[0]; ++ ++ /* These are SSA instructions, which means they have the same value ++ * regardless of what block they're in. However, being in different ++ * blocks may mean that one expression or the other is not always ++ * executed. */ ++ ++ if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4 ++ && group->block == block ++ && is_same_vectorizable_expr(expr, other)) ++ { ++ group->exprs[group->expr_count++] = expr; ++ group->component_count += expr->node.data_type->e.numeric.dimx; ++ return; ++ } ++ } ++ ++ if (!hlsl_array_reserve(ctx, (void **)&state->groups, ++ &state->capacity, state->count + 1, sizeof(*state->groups))) ++ return; ++ state->groups[state->count].block = block; ++ state->groups[state->count].exprs[0] = expr; ++ state->groups[state->count].expr_count = 1; ++ state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx; ++ ++state->count; ++} ++ ++static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct vectorize_exprs_state *state) ++{ ++ struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->type == HLSL_IR_EXPR) ++ { ++ record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state); ++ } ++ else if (instr->type == HLSL_IR_IF) ++ { ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ ++ find_vectorizable_expr_groups(ctx, &iff->then_block, state); ++ find_vectorizable_expr_groups(ctx, &iff->else_block, state); ++ } ++ else if (instr->type == HLSL_IR_LOOP) ++ { ++ find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state); ++ } ++ else if (instr->type == HLSL_IR_SWITCH) ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ find_vectorizable_expr_groups(ctx, &c->body, state); ++ } ++ } ++} ++ ++/* Combine sequences like ++ * ++ * 3: @1.x ++ * 4: @2.x ++ * 5: @3 * @4 ++ * 6: @1.y ++ * 7: @2.x ++ * 8: @6 * @7 ++ * ++ * into ++ * ++ * 5_1: @1.xy ++ * 5_2: @2.xx ++ * 5_3: @5_1 * @5_2 ++ * 5: @5_3.x ++ * 8: @5_3.y ++ * ++ * Each operand to an expression needs to refer to the same ultimate source ++ * (in this case @1 and @2 respectively), but can be a swizzle thereof. ++ * ++ * In practice the swizzles @5 and @8 can generally then be vectorized again, ++ * either as part of another expression, or as part of a store. ++ */ ++static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ struct vectorize_exprs_state state = {0}; ++ bool progress = false; ++ ++ find_vectorizable_expr_groups(ctx, block, &state); ++ ++ for (unsigned int i = 0; i < state.count; ++i) ++ { ++ struct vectorizable_exprs_group *group = &state.groups[i]; ++ struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; ++ uint32_t swizzles[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *arg, *combined; ++ unsigned int component_count = 0; ++ struct hlsl_type *combined_type; ++ struct hlsl_block new_block; ++ struct hlsl_ir_expr *expr; ++ ++ if (group->expr_count == 1) ++ continue; ++ ++ hlsl_block_init(&new_block); ++ ++ for (unsigned int j = 0; j < group->expr_count; ++j) ++ { ++ expr = group->exprs[j]; ++ ++ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) ++ { ++ uint32_t arg_swizzle; ++ ++ if (!(arg = expr->operands[a].node)) ++ break; ++ ++ if (arg->type == HLSL_IR_SWIZZLE) ++ arg_swizzle = hlsl_ir_swizzle(arg)->u.vector; ++ else ++ arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W); ++ ++ /* Mask out the invalid components. */ ++ arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1; ++ swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count); ++ } ++ ++ component_count += expr->node.data_type->e.numeric.dimx; ++ } ++ ++ expr = group->exprs[0]; ++ for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) ++ { ++ if (!(arg = expr->operands[a].node)) ++ break; ++ if (arg->type == HLSL_IR_SWIZZLE) ++ arg = hlsl_ir_swizzle(arg)->val.node; ++ args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc); ++ } ++ ++ combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count); ++ combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc); ++ ++ list_move_before(&expr->node.entry, &new_block.instrs); ++ ++ TRACE("Combining %u %s instructions into %p.\n", group->expr_count, ++ debug_hlsl_expr_op(group->exprs[0]->op), combined); ++ ++ component_count = 0; ++ for (unsigned int j = 0; j < group->expr_count; ++j) ++ { ++ struct hlsl_ir_node *replacement; ++ ++ expr = group->exprs[j]; ++ ++ if (!(replacement = hlsl_new_swizzle(ctx, ++ HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count), ++ expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc))) ++ goto out; ++ component_count += expr->node.data_type->e.numeric.dimx; ++ list_add_before(&expr->node.entry, &replacement->entry); ++ hlsl_replace_node(&expr->node, replacement); ++ } ++ ++ progress = true; ++ } ++ ++out: ++ vkd3d_free(state.groups); ++ return progress; ++} ++ ++struct vectorize_stores_state ++{ ++ struct vectorizable_stores_group ++ { ++ struct hlsl_block *block; ++ /* We handle overlapping stores, because it's not really easier not to. ++ * In theory, then, we could collect an arbitrary number of stores here. ++ * ++ * In practice, overlapping stores are unlikely, and of course at most ++ * 4 stores can appear without overlap. Therefore, for simplicity, we ++ * just use a fixed array of 4. ++ * ++ * Since computing the writemask requires traversing the deref, and we ++ * need to do that anyway, we store it here for convenience. */ ++ struct hlsl_ir_store *stores[4]; ++ unsigned int path_len; ++ uint8_t writemasks[4]; ++ uint8_t store_count; ++ bool dirty; ++ } *groups; ++ size_t count, capacity; ++}; ++ ++/* This must be a store to a subsection of a vector. ++ * In theory we can also vectorize stores to packed struct fields, ++ * but this requires target-specific knowledge and is probably best left ++ * to a VSIR pass. */ ++static bool can_vectorize_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, ++ unsigned int *path_len, uint8_t *writemask) ++{ ++ struct hlsl_type *type = store->lhs.var->data_type; ++ unsigned int i; ++ ++ if (store->rhs.node->data_type->class > HLSL_CLASS_VECTOR) ++ return false; ++ ++ if (type->class == HLSL_CLASS_SCALAR) ++ return false; ++ ++ for (i = 0; type->class != HLSL_CLASS_VECTOR && i < store->lhs.path_len; ++i) ++ type = hlsl_get_element_type_from_path_index(ctx, type, store->lhs.path[i].node); ++ ++ if (type->class != HLSL_CLASS_VECTOR) ++ return false; ++ ++ *path_len = i; ++ ++ if (i < store->lhs.path_len) ++ { ++ struct hlsl_ir_constant *c; ++ ++ /* This is a store to a scalar component of a vector, achieved via ++ * indexing. */ ++ ++ if (store->lhs.path[i].node->type != HLSL_IR_CONSTANT) ++ return false; ++ c = hlsl_ir_constant(store->lhs.path[i].node); ++ *writemask = (1u << c->value.u[0].u); ++ } ++ else ++ { ++ *writemask = store->writemask; ++ } ++ ++ return true; ++} ++ ++static bool derefs_are_same_vector(struct hlsl_ctx *ctx, const struct hlsl_deref *a, const struct hlsl_deref *b) ++{ ++ struct hlsl_type *type = a->var->data_type; ++ ++ if (a->var != b->var) ++ return false; ++ ++ for (unsigned int i = 0; type->class != HLSL_CLASS_VECTOR && i < a->path_len && i < b->path_len; ++i) ++ { ++ if (a->path[i].node != b->path[i].node) ++ return false; ++ type = hlsl_get_element_type_from_path_index(ctx, type, a->path[i].node); ++ } ++ ++ return true; ++} ++ ++static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_store *store, struct vectorize_stores_state *state) ++{ ++ unsigned int path_len; ++ uint8_t writemask; ++ ++ if (!can_vectorize_store(ctx, store, &path_len, &writemask)) ++ { ++ /* In the case of a dynamically indexed vector, we must invalidate ++ * any groups that statically index the same vector. ++ * For the sake of expediency, we go one step further and invalidate ++ * any groups that store to the same variable. ++ * (We also don't check that that was the reason why this store isn't ++ * vectorizable.) ++ * We could be more granular, but we'll defer that until it comes ++ * up in practice. */ ++ for (size_t i = 0; i < state->count; ++i) ++ { ++ if (state->groups[i].stores[0]->lhs.var == store->lhs.var) ++ state->groups[i].dirty = true; ++ } ++ return; ++ } ++ ++ for (size_t i = 0; i < state->count; ++i) ++ { ++ struct vectorizable_stores_group *group = &state->groups[i]; ++ struct hlsl_ir_store *other = group->stores[0]; ++ ++ if (group->dirty) ++ continue; ++ ++ if (derefs_are_same_vector(ctx, &store->lhs, &other->lhs)) ++ { ++ /* Stores must be in the same CFG block. If they're not, ++ * they're not executed in exactly the same flow, and ++ * therefore can't be vectorized. */ ++ if (group->block == block ++ && is_same_vectorizable_source(store->rhs.node, other->rhs.node)) ++ { ++ if (group->store_count < ARRAY_SIZE(group->stores)) ++ { ++ group->stores[group->store_count] = store; ++ group->writemasks[group->store_count] = writemask; ++ ++group->store_count; ++ return; ++ } ++ } ++ else ++ { ++ /* A store to the same vector with a different source, or in ++ * a different CFG block, invalidates any earlier store. ++ * ++ * A store to a component which *contains* the vector in ++ * question would also invalidate, but we should have split all ++ * of those by the time we get here. */ ++ group->dirty = true; ++ ++ /* Note that we do exit this loop early if we find a store A we ++ * can vectorize with, but that's fine. If there was a store B ++ * also in the state that we can't vectorize with, it would ++ * already have invalidated A. */ ++ } ++ } ++ else ++ { ++ /* This could still be a store to the same vector, if e.g. the ++ * vector is part of a dynamically indexed array, or the path has ++ * two equivalent instructions which refer to the same component. ++ * [CSE may help with the latter, but we don't have it yet, ++ * and we shouldn't depend on it anyway.] ++ * For the sake of expediency, we just invalidate it if it refers ++ * to the same variable at all. ++ * As above, we could be more granular, but we'll defer that until ++ * it comes up in practice. */ ++ if (store->lhs.var == other->lhs.var) ++ group->dirty = true; ++ ++ /* As above, we don't need to worry about exiting the loop early. */ ++ } ++ } ++ ++ if (!hlsl_array_reserve(ctx, (void **)&state->groups, ++ &state->capacity, state->count + 1, sizeof(*state->groups))) ++ return; ++ state->groups[state->count].block = block; ++ state->groups[state->count].stores[0] = store; ++ state->groups[state->count].path_len = path_len; ++ state->groups[state->count].writemasks[0] = writemask; ++ state->groups[state->count].store_count = 1; ++ state->groups[state->count].dirty = false; ++ ++state->count; ++} ++ ++static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct vectorize_stores_state *state) ++{ ++ struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->type == HLSL_IR_STORE) ++ { ++ record_vectorizable_store(ctx, block, hlsl_ir_store(instr), state); ++ } ++ else if (instr->type == HLSL_IR_LOAD) ++ { ++ struct hlsl_ir_var *var = hlsl_ir_load(instr)->src.var; ++ ++ /* By vectorizing store A with store B, we are effectively moving ++ * store A down to happen at the same time as store B. ++ * If there was a load of the same variable between the two, this ++ * would be incorrect. ++ * Therefore invalidate all stores to this variable. As above, we ++ * could be more granular if necessary. */ ++ ++ for (unsigned int i = 0; i < state->count; ++i) ++ { ++ if (state->groups[i].stores[0]->lhs.var == var) ++ state->groups[i].dirty = true; ++ } ++ } ++ else if (instr->type == HLSL_IR_IF) ++ { ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ ++ find_vectorizable_store_groups(ctx, &iff->then_block, state); ++ find_vectorizable_store_groups(ctx, &iff->else_block, state); ++ } ++ else if (instr->type == HLSL_IR_LOOP) ++ { ++ find_vectorizable_store_groups(ctx, &hlsl_ir_loop(instr)->body, state); ++ } ++ else if (instr->type == HLSL_IR_SWITCH) ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ find_vectorizable_store_groups(ctx, &c->body, state); ++ } ++ } ++} ++ ++/* Combine sequences like ++ * ++ * 2: @1.yw ++ * 3: @1.zy ++ * 4: var.xy = @2 ++ * 5: var.yw = @3 ++ * ++ * to ++ * ++ * 2: @1.yzy ++ * 5: var.xyw = @2 ++ * ++ * There are a lot of gotchas here. We need to make sure the two stores are to ++ * the same vector (which may be embedded in a complex variable), that they're ++ * always executed in the same control flow, and that there aren't any other ++ * stores or loads on the same vector in the middle. */ ++static bool vectorize_stores(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ struct vectorize_stores_state state = {0}; ++ bool progress = false; ++ ++ find_vectorizable_store_groups(ctx, block, &state); ++ ++ for (unsigned int i = 0; i < state.count; ++i) ++ { ++ struct vectorizable_stores_group *group = &state.groups[i]; ++ uint32_t new_swizzle = 0, new_writemask = 0; ++ struct hlsl_ir_node *new_rhs, *value; ++ uint32_t swizzle_components[4]; ++ unsigned int component_count; ++ struct hlsl_ir_store *store; ++ struct hlsl_block new_block; ++ ++ if (group->store_count == 1) ++ continue; ++ ++ hlsl_block_init(&new_block); ++ ++ /* Compute the swizzle components. */ ++ for (unsigned int j = 0; j < group->store_count; ++j) ++ { ++ unsigned int writemask = group->writemasks[j]; ++ uint32_t rhs_swizzle; ++ ++ store = group->stores[j]; ++ ++ if (store->rhs.node->type == HLSL_IR_SWIZZLE) ++ rhs_swizzle = hlsl_ir_swizzle(store->rhs.node)->u.vector; ++ else ++ rhs_swizzle = HLSL_SWIZZLE(X, Y, Z, W); ++ ++ component_count = 0; ++ for (unsigned int k = 0; k < 4; ++k) ++ { ++ if (writemask & (1u << k)) ++ swizzle_components[k] = hlsl_swizzle_get_component(rhs_swizzle, component_count++); ++ } ++ ++ new_writemask |= writemask; ++ } ++ ++ /* Construct the new swizzle. */ ++ component_count = 0; ++ for (unsigned int k = 0; k < 4; ++k) ++ { ++ if (new_writemask & (1u << k)) ++ hlsl_swizzle_set_component(&new_swizzle, component_count++, swizzle_components[k]); ++ } ++ ++ store = group->stores[0]; ++ value = store->rhs.node; ++ if (value->type == HLSL_IR_SWIZZLE) ++ value = hlsl_ir_swizzle(value)->val.node; ++ ++ new_rhs = hlsl_block_add_swizzle(ctx, &new_block, new_swizzle, component_count, value, &value->loc); ++ hlsl_block_add_store_parent(ctx, &new_block, &store->lhs, ++ group->path_len, new_rhs, new_writemask, &store->node.loc); ++ ++ TRACE("Combining %u stores to %s.\n", group->store_count, store->lhs.var->name); ++ ++ list_move_before(&group->stores[group->store_count - 1]->node.entry, &new_block.instrs); ++ ++ for (unsigned int j = 0; j < group->store_count; ++j) ++ { ++ list_remove(&group->stores[j]->node.entry); ++ hlsl_free_instr(&group->stores[j]->node); ++ } ++ ++ progress = true; ++ } ++ ++ vkd3d_free(state.groups); ++ return progress; ++} ++ + static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, + const struct hlsl_deref *deref) + { +@@ -3123,6 +3674,11 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc + return false; + } + ++static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) ++{ ++ return ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && deref->var->is_uniform; ++} ++ + /* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant + * index into multiple constant loads, where the value of only one of them ends up in the resulting + * node. +@@ -3149,6 +3705,9 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n + if (deref->path_len == 0) + return false; + ++ if (deref_supports_sm1_indirect_addressing(ctx, deref)) ++ return false; ++ + for (i = deref->path_len - 1; ; --i) + { + if (deref->path[i].node->type != HLSL_IR_CONSTANT) +@@ -7839,7 +8398,8 @@ static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, st + + if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) + return false; +- src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); ++ if (src_param->reg.dimension != VSIR_DIMENSION_NONE) ++ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + return true; + } + +@@ -7869,7 +8429,6 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) + { + struct hlsl_ir_node *instr = &constant->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + +@@ -7881,13 +8440,11 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->reg.idx[0].offset = constant->reg.id; + src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + } + + static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, +@@ -7974,11 +8531,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = instr->reg.id; ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + dst_param->write_mask = 1u << i; + + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + c = vsir_swizzle_get_component(src_swizzle, i); + src_param->swizzle = vsir_swizzle_from_writemask(1u << c); + } +@@ -7990,7 +8549,6 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi + { + struct hlsl_ir_node *operand = expr->operands[0].node; + struct hlsl_ir_node *instr = &expr->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + unsigned int src_count = 0; +@@ -8001,25 +8559,20 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; +- +- src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = operand->reg.id; +- src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, VKD3DSP_WRITEMASK_ALL); + + if (ctx->profile->major_version < 3) + { + src_param = &ins->src[1]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + + src_param = &ins->src[2]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id; + src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; + } +@@ -8341,19 +8894,68 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, + else + VKD3D_ASSERT(reg.allocated); + +- vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ if (type == VKD3DSPR_DEPTHOUT) ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); ++ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; ++ } ++ else ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ dst_param->reg.idx[0].offset = register_index; ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ } + dst_param->write_mask = writemask; +- dst_param->reg.idx[0].offset = register_index; + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir."); + } + ++static void sm1_generate_vsir_instr_mova(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_node *instr) ++{ ++ enum vkd3d_shader_opcode opcode = hlsl_version_ge(ctx, 2, 0) ? VKD3DSIH_MOVA : VKD3DSIH_MOV; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(instr->reg.allocated); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1))) ++ return; ++ ++ dst_param = &ins->dst[0]; ++ vsir_register_init(&dst_param->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); ++ dst_param->write_mask = VKD3DSP_WRITEMASK_0; ++ ++ VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(instr->data_type->e.numeric.dimx == 1); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, instr, VKD3DSP_WRITEMASK_ALL); ++} ++ ++static struct vkd3d_shader_src_param *sm1_generate_vsir_new_address_src(struct hlsl_ctx *ctx, ++ struct vsir_program *program) ++{ ++ struct vkd3d_shader_src_param *idx_src; ++ ++ if (!(idx_src = vsir_program_get_src_params(program, 1))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return NULL; ++ } ++ ++ memset(idx_src, 0, sizeof(*idx_src)); ++ vsir_register_init(&idx_src->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); ++ idx_src->reg.dimension = VSIR_DIMENSION_VEC4; ++ idx_src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ return idx_src; ++} ++ + static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, +- struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref, +- unsigned int dst_writemask, const struct vkd3d_shader_location *loc) ++ struct vsir_program *program, struct vkd3d_shader_src_param *src_param, ++ struct hlsl_deref *deref, uint32_t dst_writemask, const struct vkd3d_shader_location *loc) + { + enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; ++ struct vkd3d_shader_src_param *src_rel_addr = NULL; + struct vkd3d_shader_version version; + uint32_t register_index; + unsigned int writemask; +@@ -8371,12 +8973,26 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + } + else if (deref->var->is_uniform) + { ++ unsigned int offset = deref->const_offset; ++ + type = VKD3DSPR_CONST; ++ register_index = deref->var->regs[HLSL_REGSET_NUMERIC].id + offset / 4; + +- reg = hlsl_reg_from_deref(ctx, deref); +- register_index = reg.id; +- writemask = reg.writemask; +- VKD3D_ASSERT(reg.allocated); ++ writemask = 0xf & (0xf << (offset % 4)); ++ if (deref->var->regs[HLSL_REGSET_NUMERIC].writemask) ++ writemask = hlsl_combine_writemasks(deref->var->regs[HLSL_REGSET_NUMERIC].writemask, writemask); ++ ++ if (deref->rel_offset.node) ++ { ++ VKD3D_ASSERT(deref_supports_sm1_indirect_addressing(ctx, deref)); ++ ++ if (!(src_rel_addr = sm1_generate_vsir_new_address_src(ctx, program))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ } ++ VKD3D_ASSERT(deref->var->regs[HLSL_REGSET_NUMERIC].allocated); + } + else if (deref->var->is_input_semantic) + { +@@ -8408,32 +9024,30 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + } + + vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->reg.idx[0].offset = register_index; ++ src_param->reg.idx[0].rel_addr = src_rel_addr; + src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); +- +- if (deref->rel_offset.node) +- hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); + } + + static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, + struct hlsl_ir_load *load) + { + struct hlsl_ir_node *instr = &load->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(instr->reg.allocated); + ++ if (load->src.rel_offset.node) ++ sm1_generate_vsir_instr_mova(ctx, program, load->src.rel_offset.node); ++ + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + +- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask, +- &ins->location); ++ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[0], ++ &load->src, ins->dst[0].write_mask, &ins->location); + } + + static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, +@@ -8443,7 +9057,6 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + struct hlsl_ir_node *ddx = load->ddx.node; + struct hlsl_ir_node *ddy = load->ddy.node; + struct hlsl_ir_node *instr = &load->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; +@@ -8482,15 +9095,12 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + return; + ins->flags = flags; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + src_param = &ins->src[0]; + vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); + +- sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, ++ sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], &load->resource, + VKD3DSP_WRITEMASK_ALL, &ins->location); + + if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) +@@ -8507,7 +9117,6 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) + { + struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; +@@ -8517,11 +9126,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); +- dst_param->reg.idx[0].offset = instr->reg.id; +- dst_param->reg.dimension = VSIR_DIMENSION_VEC4; +- dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); + swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx); +@@ -8557,7 +9162,6 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *condition = jump->condition.node; + struct hlsl_ir_node *instr = &jump->node; +- struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) +@@ -8565,10 +9169,7 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) + return; + +- dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- dst_param->reg.idx[0].offset = condition->reg.id; +- dst_param->write_mask = condition->reg.writemask; ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, condition); + } + else + { +@@ -8689,6 +9290,10 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + return; + } + ++ program->temp_count = allocate_temp_registers(ctx, entry_func); ++ if (ctx->result) ++ return; ++ + generate_vsir_signature(ctx, program, entry_func); + + hlsl_block_init(&block); +@@ -12532,6 +13137,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, + struct recursive_call_ctx recursive_call_ctx; + struct hlsl_ir_var *var; + unsigned int i; ++ bool progress; + + ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func; + +@@ -12709,6 +13315,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, + hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); + } + ++ compute_liveness(ctx, entry_func); ++ transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); ++ + loop_unrolling_execute(ctx, body); + hlsl_run_const_passes(ctx, body); + +@@ -12719,13 +13328,21 @@ static void process_entry_function(struct hlsl_ctx *ctx, + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); + +- compute_liveness(ctx, entry_func); +- transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); +- + if (hlsl_version_lt(ctx, 4, 0)) + hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); + + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); ++ ++ do ++ { ++ progress = vectorize_exprs(ctx, body); ++ compute_liveness(ctx, entry_func); ++ progress |= hlsl_transform_ir(ctx, dce, body, NULL); ++ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); ++ progress |= vectorize_stores(ctx, body); ++ } while (progress); ++ + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + + if (hlsl_version_ge(ctx, 4, 0)) +@@ -12847,7 +13464,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + if (profile->major_version < 4) + { + mark_indexable_vars(ctx, entry_func); +- allocate_temp_registers(ctx, entry_func); + allocate_const_registers(ctx, entry_func); + sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); + allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 3a784c71388..72cf53761e4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -470,6 +470,80 @@ static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_d + return false; + } + ++static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct vkd3d_shader_instruction *ins, *ins2; ++ unsigned int tmp_idx = ~0u; ++ unsigned int i, k, r; ++ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (ins->opcode == VKD3DSIH_MOV && ins->dst[0].reg.type == VKD3DSPR_ADDR) ++ { ++ if (tmp_idx == ~0u) ++ tmp_idx = program->temp_count++; ++ ++ ins->opcode = VKD3DSIH_FTOU; ++ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins->dst[0].reg.idx[0].offset = tmp_idx; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ } ++ else if (ins->opcode == VKD3DSIH_MOVA) ++ { ++ if (tmp_idx == ~0u) ++ tmp_idx = program->temp_count++; ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[i]; ++ ins2 = &program->instructions.elements[i + 1]; ++ ++ ins->opcode = VKD3DSIH_ROUND_NE; ++ vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = tmp_idx; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ if (!vsir_instruction_init_with_params(program, ins2, &ins->location, VKD3DSIH_FTOU, 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ vsir_register_init(&ins2->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ ins2->dst[0].reg.idx[0].offset = tmp_idx; ++ ins2->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins2->dst[0].write_mask = ins->dst[0].write_mask; ++ ++ vsir_register_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ ins2->src[0].reg.idx[0].offset = tmp_idx; ++ ins2->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins2->src[0].swizzle = vsir_swizzle_from_writemask(ins2->dst[0].write_mask); ++ } ++ ++ for (k = 0; k < ins->src_count; ++k) ++ { ++ struct vkd3d_shader_src_param *src = &ins->src[k]; ++ ++ for (r = 0; r < src->reg.idx_count; ++r) ++ { ++ struct vkd3d_shader_src_param *rel = src->reg.idx[r].rel_addr; ++ ++ if (rel && rel->reg.type == VKD3DSPR_ADDR) ++ { ++ if (tmp_idx == ~0u) ++ tmp_idx = program->temp_count++; ++ ++ vsir_register_init(&rel->reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); ++ rel->reg.idx[0].offset = tmp_idx; ++ rel->reg.dimension = VSIR_DIMENSION_VEC4; ++ } ++ } ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, + struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, + struct vkd3d_shader_message_context *message_context) +@@ -482,6 +556,7 @@ static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ifc = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -535,6 +610,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program + + if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ texkill = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -621,6 +697,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ mad = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -665,6 +742,7 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ sincos = &instructions->elements[pos]; + + ins = &instructions->elements[pos + 1]; + +@@ -717,6 +795,7 @@ static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ tex = &instructions->elements[pos]; + + if (*tmp_idx == ~0u) + *tmp_idx = program->temp_count++; +@@ -1128,6 +1207,7 @@ static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *progra + if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &program->instructions.elements[i]; ++ + vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = 0; +@@ -1346,7 +1426,6 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + loc = ins->location; + if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- + ins = &program->instructions.elements[i]; + + for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) +@@ -2426,7 +2505,8 @@ struct flat_constants_normaliser + }; + + static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, +- enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) ++ enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index, ++ struct vkd3d_shader_src_param **rel_addr) + { + static const struct + { +@@ -2446,12 +2526,8 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * + { + if (reg->type == regs[i].type) + { +- if (reg->idx[0].rel_addr) +- { +- FIXME("Unhandled relative address.\n"); +- return false; +- } +- ++ if (rel_addr) ++ *rel_addr = reg->idx[0].rel_addr; + *set = regs[i].set; + *index = reg->idx[0].offset; + return true; +@@ -2465,10 +2541,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par + const struct flat_constants_normaliser *normaliser) + { + enum vkd3d_shader_d3dbc_constant_register set; ++ struct vkd3d_shader_src_param *rel_addr; + uint32_t index; + size_t i, j; + +- if (!get_flat_constant_register_type(¶m->reg, &set, &index)) ++ if (!get_flat_constant_register_type(¶m->reg, &set, &index, &rel_addr)) + return; + + for (i = 0; i < normaliser->def_count; ++i) +@@ -2486,8 +2563,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par + + param->reg.type = VKD3DSPR_CONSTBUFFER; + param->reg.idx[0].offset = set; /* register ID */ ++ param->reg.idx[0].rel_addr = NULL; + param->reg.idx[1].offset = set; /* register index */ ++ param->reg.idx[1].rel_addr = NULL; + param->reg.idx[2].offset = index; /* buffer index */ ++ param->reg.idx[2].rel_addr = rel_addr; + param->reg.idx_count = 3; + } + +@@ -2514,7 +2594,7 @@ static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_progr + + def = &normaliser.defs[normaliser.def_count++]; + +- get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); ++ get_flat_constant_register_type(&ins->dst[0].reg, &def->set, &def->index, NULL); + for (j = 0; j < 4; ++j) + def->value[j] = ins->src[0].reg.u.immconst_u32[j]; + +@@ -6037,6 +6117,7 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + static const struct vkd3d_shader_location no_loc; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; +@@ -6061,9 +6142,10 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + { + if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; + ins = &program->instructions.elements[pos]; + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; + src_param_init_const_uint(&ins->src[0], 0); + +@@ -6073,20 +6155,20 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- ++ ret = NULL; + ins = &program->instructions.elements[pos]; + + switch (ref->data_type) + { + case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: +- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); ++ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].float_opcode, 1, 2); + src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); + src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); + break; + + case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: +- vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); ++ vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].uint_opcode, 1, 2); + src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); + src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); +@@ -6107,14 +6189,14 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr + ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); + + ++ins; +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; + src_param_init_ssa_bool(&ins->src[0], program->ssa_count); + + ++program->ssa_count; + + ++ins; +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = colour_signature_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +@@ -6215,13 +6297,14 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog + uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos) + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int output_idx = 0; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- ++ ret = NULL; + ins = &program->instructions.elements[pos]; + + for (unsigned int i = 0; i < 8; ++i) +@@ -6229,7 +6312,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog + if (!(mask & (1u << i))) + continue; + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DP4, 1, 2); + src_param_init_temp_float4(&ins->src[0], position_temp); + src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); + ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; +@@ -6247,7 +6330,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog + ++ins; + } + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = position_signature_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; +@@ -6404,15 +6487,16 @@ static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *progr + const struct vkd3d_shader_instruction *ret, size_t *ret_pos) + { + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- ++ ret = NULL; + ins = &program->instructions.elements[pos]; + +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); +@@ -6541,9 +6625,9 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra + + if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[i + 1]; + + loc = &program->instructions.elements[i].location; +- ins = &program->instructions.elements[i + 1]; + + if (min_parameter) + { +@@ -6741,7 +6825,6 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr + { + if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- + ins = &program->instructions.elements[insert_pos]; + + vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); +@@ -6815,6 +6898,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; ++ + *ret_pos = pos + 4; + + ssa_temp = program->ssa_count++; +@@ -6845,6 +6930,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; ++ + *ret_pos = pos + 4; + + ssa_temp = program->ssa_count++; +@@ -6875,6 +6962,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro + */ + if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; ++ + *ret_pos = pos + 5; + + ssa_temp = program->ssa_count++; +@@ -7053,16 +7142,18 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr + { + const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; + struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ const struct vkd3d_shader_location loc = ret->location; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; ++ ret = NULL; + + ins = &program->instructions.elements[pos]; + + /* Write the fog output. */ +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); + src_param_init_temp_float4(&ins->src[0], temp); + if (source == VKD3D_SHADER_FOG_SOURCE_Z) +@@ -7072,7 +7163,7 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr + ++ins; + + /* Write the position or specular output. */ +- vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), + source_signature_idx, e->mask); + src_param_init_temp_float4(&ins->src[0], temp); +@@ -7707,6 +7798,33 @@ static void vsir_validate_label_register(struct validation_context *ctx, + reg->idx[0].offset, ctx->program->block_count); + } + ++static void vsir_validate_descriptor_indices(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg, enum vkd3d_shader_descriptor_type type, const char *name) ++{ ++ const struct vkd3d_shader_descriptor_info1 *descriptor; ++ ++ if (reg->idx[0].rel_addr) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Non-NULL indirect address for the ID of a register of type \"%s\".", name); ++ ++ if (!ctx->program->has_descriptor_info) ++ return; ++ ++ if (!(descriptor = vkd3d_shader_find_descriptor(&ctx->program->descriptors, type, reg->idx[0].offset))) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "No matching descriptor found for register %s%u.", name, reg->idx[0].offset); ++ return; ++ } ++ ++ if (!reg->idx[1].rel_addr && (reg->idx[1].offset < descriptor->register_index ++ || reg->idx[1].offset - descriptor->register_index >= descriptor->count)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Register index %u doesn't belong to the range [%u, %u] for register %s%u.", ++ reg->idx[1].offset, descriptor->register_index, ++ descriptor->register_index + descriptor->count - 1, name, reg->idx[0].offset); ++} ++ + static void vsir_validate_constbuffer_register(struct validation_context *ctx, + const struct vkd3d_shader_register *reg) + { +@@ -7725,9 +7843,7 @@ static void vsir_validate_constbuffer_register(struct validation_context *ctx, + return; + } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "Non-NULL relative address for a CONSTBUFFER register ID."); ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, "cb"); + } + + static void vsir_validate_sampler_register(struct validation_context *ctx, +@@ -7753,9 +7869,7 @@ static void vsir_validate_sampler_register(struct validation_context *ctx, + return; + } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "Non-NULL relative address for the descriptor index of a SAMPLER register."); ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, "s"); + } + + static void vsir_validate_resource_register(struct validation_context *ctx, +@@ -7780,9 +7894,7 @@ static void vsir_validate_resource_register(struct validation_context *ctx, + return; + } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "Non-NULL relative address for the descriptor index of a RESOURCE register."); ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, "t"); + } + + static void vsir_validate_uav_register(struct validation_context *ctx, +@@ -7812,9 +7924,7 @@ static void vsir_validate_uav_register(struct validation_context *ctx, + return; + } + +- if (reg->idx[0].rel_addr) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, +- "Non-NULL relative address for the descriptor index of a UAV register."); ++ vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, "u"); + } + + static void vsir_validate_ssa_register(struct validation_context *ctx, +@@ -9824,6 +9934,9 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t + if (program->shader_version.major <= 2) + vsir_transform(&ctx, vsir_program_ensure_diffuse); + ++ if (program->shader_version.major < 4) ++ vsir_transform(&ctx, vsir_program_normalize_addr); ++ + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + vsir_transform(&ctx, vsir_program_remap_output_signature); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index 756b43298d3..a5d952cd525 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -1310,6 +1310,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, + return ret; + + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); ++ VKD3D_ASSERT(program->has_descriptor_info); + + if ((ret = msl_generator_init(&generator, program, compile_info, message_context)) < 0) + return ret; +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 0cc1ceca798..91a6686eb0d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -11361,6 +11361,7 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, + return ret; + + VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); ++ VKD3D_ASSERT(program->has_descriptor_info); + + if (!(spirv_compiler = spirv_compiler_create(program, compile_info, + message_context, config_flags))) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 2afeff086e5..9191429c439 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -1127,7 +1127,7 @@ static void vkd3d_shader_scan_combined_sampler_declaration( + &semantic->resource.range, semantic->resource_type, VKD3D_SHADER_RESOURCE_DATA_FLOAT); + } + +-static const struct vkd3d_shader_descriptor_info1 *find_descriptor( ++const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( + const struct vkd3d_shader_scan_descriptor_info1 *info, + enum vkd3d_shader_descriptor_type type, unsigned int register_id) + { +@@ -1181,11 +1181,11 @@ static void vkd3d_shader_scan_combined_sampler_usage(struct vkd3d_shader_scan_co + if (dynamic_resource || dynamic_sampler) + return; + +- if ((d = find_descriptor(context->scan_descriptor_info, ++ if ((d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, + VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource->idx[0].offset))) + resource_space = d->register_space; + +- if (sampler && (d = find_descriptor(context->scan_descriptor_info, ++ if (sampler && (d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler->idx[0].offset))) + sampler_space = d->register_space; + } +@@ -1606,6 +1606,9 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, + add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context); + ++ if (add_descriptor_info) ++ program->has_descriptor_info = true; ++ + if (TRACE_ON()) + vsir_program_trace(program); + +@@ -2046,6 +2049,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + VKD3D_SHADER_TARGET_SPIRV_BINARY, + #if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + VKD3D_SHADER_TARGET_SPIRV_TEXT, ++#endif ++#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL ++ VKD3D_SHADER_TARGET_GLSL, + #endif + VKD3D_SHADER_TARGET_D3D_ASM, + VKD3D_SHADER_TARGET_D3D_BYTECODE, +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index e794257b9d8..bf794d5e936 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1444,6 +1444,9 @@ struct vkd3d_shader_scan_descriptor_info1 + unsigned int descriptor_count; + }; + ++const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( ++ const struct vkd3d_shader_scan_descriptor_info1 *info, ++ enum vkd3d_shader_descriptor_type type, unsigned int register_id); + void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info); + + struct vsir_program +@@ -1456,6 +1459,7 @@ struct vsir_program + struct shader_signature patch_constant_signature; + + struct vkd3d_shader_scan_descriptor_info1 descriptors; ++ bool has_descriptor_info; + + unsigned int parameter_count; + const struct vkd3d_shader_parameter1 *parameters; +-- +2.47.2 +