From 9d9c5c700a6d49ff585628f8dfbeea15a4cb692a Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Sat, 19 Aug 2023 12:24:21 +1000 Subject: [PATCH] Updated vkd3d-latest patchset --- ...-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch | 3610 +++++++++++++++++ 1 file changed, 3610 insertions(+) create mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch new file mode 100644 index 00000000..7ea60aca --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-4f2e07a45d0cdb82b1cbba0cfe95c87a697.patch @@ -0,0 +1,3610 @@ +From c5de2391c76b56a016df7907ce484035f1ace2b6 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Sat, 19 Aug 2023 10:47:49 +1000 +Subject: [PATCH] Updated vkd3d to 4f2e07a45d0cdb82b1cbba0cfe95c87a69799865. + +--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 4 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 18 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 11 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 61 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 128 ++- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 170 +++- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 18 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 865 ++++++++++-------- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 189 ++-- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 25 +- + 10 files changed, 907 insertions(+), 582 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 35e5c454d57..99a5bd7a438 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1686,7 +1686,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + else + { + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); +- put_u32(buffer, var->regs[r].bind_count); ++ put_u32(buffer, var->bind_count[r]); + } + put_u32(buffer, 0); /* type */ + put_u32(buffer, 0); /* FIXME: default value */ +@@ -2033,7 +2033,7 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + if (!var->regs[HLSL_REGSET_SAMPLERS].allocated) + continue; + +- count = var->regs[HLSL_REGSET_SAMPLERS].bind_count; ++ count = var->bind_count[HLSL_REGSET_SAMPLERS]; + + for (i = 0; i < count; ++i) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 4ed7712b0aa..8b706e1e667 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -1544,7 +1544,7 @@ static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, + hlsl_block_cleanup(dst_block); + return false; + } +- list_add_tail(&dst_block->instrs, &dst->entry); ++ hlsl_block_add_instr(dst_block, dst); + + if (!list_empty(&src->uses)) + { +@@ -2244,11 +2244,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) + + static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_node *instr); + +-static void dump_instr_list(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct list *list) ++static void dump_block(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_block *block) + { + struct hlsl_ir_node *instr; + +- LIST_FOR_EACH_ENTRY(instr, list, struct hlsl_ir_node, entry) ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + dump_instr(ctx, buffer, instr); + vkd3d_string_buffer_printf(buffer, "\n"); +@@ -2490,9 +2490,9 @@ static void dump_ir_if(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + vkd3d_string_buffer_printf(buffer, "if ("); + dump_src(buffer, &if_node->condition); + vkd3d_string_buffer_printf(buffer, ") {\n"); +- dump_instr_list(ctx, buffer, &if_node->then_block.instrs); ++ dump_block(ctx, buffer, &if_node->then_block); + vkd3d_string_buffer_printf(buffer, " %10s } else {\n", ""); +- dump_instr_list(ctx, buffer, &if_node->else_block.instrs); ++ dump_block(ctx, buffer, &if_node->else_block); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); + } + +@@ -2525,7 +2525,7 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i + static void dump_ir_loop(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_loop *loop) + { + vkd3d_string_buffer_printf(buffer, "for (;;) {\n"); +- dump_instr_list(ctx, buffer, &loop->body.instrs); ++ dump_block(ctx, buffer, &loop->body); + vkd3d_string_buffer_printf(buffer, " %10s }", ""); + } + +@@ -2713,7 +2713,7 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl + vkd3d_string_buffer_printf(&buffer, "\n"); + } + if (func->has_body) +- dump_instr_list(ctx, &buffer, &func->body.instrs); ++ dump_block(ctx, &buffer, &func->body); + + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); +@@ -2922,7 +2922,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) + + for (i = 0; i < attr->args_count; ++i) + hlsl_src_remove(&attr->args[i]); +- hlsl_free_instr_list(&attr->instrs); ++ hlsl_block_cleanup(&attr->instrs); + vkd3d_free((void *)attr->name); + vkd3d_free(attr); + } +@@ -3377,6 +3377,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + struct hlsl_type *type, *next_type; + unsigned int i; + ++ hlsl_block_cleanup(&ctx->static_initializers); ++ + for (i = 0; i < ctx->source_files_count; ++i) + vkd3d_free((void *)ctx->source_files[i]); + vkd3d_free(ctx->source_files); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index b1928312066..0a8d3a692a3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -257,7 +257,7 @@ struct hlsl_reg + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ +- uint32_t bind_count; ++ uint32_t allocation_size; + /* For numeric registers, a writemask can be provided to indicate the reservation of only some + * of the 4 components. */ + unsigned int writemask; +@@ -337,7 +337,7 @@ struct hlsl_src + struct hlsl_attribute + { + const char *name; +- struct list instrs; ++ struct hlsl_block instrs; + struct vkd3d_shader_location loc; + unsigned int args_count; + struct hlsl_src args[]; +@@ -417,6 +417,9 @@ struct hlsl_ir_var + enum hlsl_sampler_dim sampler_dim; + struct vkd3d_shader_location first_sampler_dim_loc; + } *objects_usage[HLSL_REGSET_LAST_OBJECT + 1]; ++ /* Minimum number of binds required to include all object components actually used in the shader. ++ * It may be less than the allocation size, e.g. for texture arrays. */ ++ unsigned int bind_count[HLSL_REGSET_LAST_OBJECT + 1]; + + uint32_t is_input_semantic : 1; + uint32_t is_output_semantic : 1; +@@ -1150,7 +1153,7 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls + const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); + + struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); +@@ -1251,7 +1254,7 @@ int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun + bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, + const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); + bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); ++ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx); + int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); + + int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 0695f7864bf..29e0ff0c5be 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -138,14 +138,6 @@ static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) + return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); + } + +-static struct list *block_to_list(struct hlsl_block *block) +-{ +- /* This is a temporary hack to ease the transition from lists to blocks. +- * It takes advantage of the fact that an allocated hlsl_block pointer is +- * byte-compatible with an allocated list pointer. */ +- return &block->instrs; +-} +- + static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) + { + struct hlsl_block *block; +@@ -351,7 +343,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl + + dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); + +- if (!(component_load = hlsl_add_load_component(ctx, block_to_list(block), node, src_idx, loc))) ++ if (!(component_load = hlsl_add_load_component(ctx, block, node, src_idx, loc))) + return NULL; + + if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) +@@ -677,11 +669,11 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, + return true; + } + +-struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +- unsigned int comp, const struct vkd3d_shader_location *loc) ++struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *load, *store; +- struct hlsl_block block; ++ struct hlsl_block load_block; + struct hlsl_ir_var *var; + struct hlsl_deref src; + +@@ -690,12 +682,12 @@ struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list * + + if (!(store = hlsl_new_simple_store(ctx, var, var_instr))) + return NULL; +- list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + + hlsl_init_simple_deref_from_var(&src, var); +- if (!(load = hlsl_new_load_component(ctx, &block, &src, comp, loc))) ++ if (!(load = hlsl_new_load_component(ctx, &load_block, &src, comp, loc))) + return NULL; +- list_move_tail(instrs, &block.instrs); ++ hlsl_block_add_block(block, &load_block); + + return load; + } +@@ -1340,7 +1332,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl + { + if (operands[j]) + { +- if (!(load = hlsl_add_load_component(ctx, block_to_list(block), operands[j], i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, block, operands[j], i, loc))) + return NULL; + + cell_operands[j] = load; +@@ -1822,7 +1814,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + return NULL; + hlsl_block_add_instr(block, cell); + +- if (!(load = hlsl_add_load_component(ctx, block_to_list(block), rhs, k++, &rhs->loc))) ++ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) + return NULL; + + if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) +@@ -1911,7 +1903,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i + struct hlsl_type *dst_comp_type; + struct hlsl_block block; + +- if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, k, &src->loc))) ++ if (!(load = hlsl_add_load_component(ctx, instrs, src, k, &src->loc))) + return; + + dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); +@@ -2139,6 +2131,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + if (var->semantic.name) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Semantics are not allowed on local variables."); ++ ++ if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count && !(modifiers & HLSL_STORAGE_STATIC)) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, ++ "Const variable \"%s\" is missing an initializer.", var->name); ++ } + } + + if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) +@@ -2148,15 +2146,6 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + "Static variables cannot have both numeric and resource components."); + } + +- if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count +- && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) +- { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, +- "Const variable \"%s\" is missing an initializer.", var->name); +- hlsl_free_var(var); +- return; +- } +- + if (!hlsl_add_var(ctx, var, local)) + { + struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); +@@ -2469,7 +2458,7 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { +- if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) +@@ -2513,7 +2502,7 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, + count = hlsl_type_component_count(arg->data_type); + for (i = 0; i < count; ++i) + { +- if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, i, loc))) + return false; + + if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) +@@ -3170,11 +3159,11 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + { + struct hlsl_ir_node *value1, *value2, *mul; + +- if (!(value1 = hlsl_add_load_component(ctx, block_to_list(params->instrs), ++ if (!(value1 = hlsl_add_load_component(ctx, params->instrs, + cast1, j * cast1->data_type->dimx + k, loc))) + return false; + +- if (!(value2 = hlsl_add_load_component(ctx, block_to_list(params->instrs), ++ if (!(value2 = hlsl_add_load_component(ctx, params->instrs, + cast2, k * cast2->data_type->dimx + i, loc))) + return false; + +@@ -3531,7 +3520,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, + { + struct hlsl_block block; + +- if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, j * arg->data_type->dimx + i, loc))) ++ if (!(load = hlsl_add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + return false; + + if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) +@@ -4193,7 +4182,7 @@ static bool add_assignment_from_component(struct hlsl_ctx *ctx, struct hlsl_bloc + if (!dest) + return true; + +- if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, component, loc))) ++ if (!(load = hlsl_add_load_component(ctx, instrs, src, component, loc))) + return false; + + if (!add_assignment(ctx, instrs, dest, ASSIGN_OP_ASSIGN, load)) +@@ -5009,7 +4998,7 @@ attribute: + YYABORT; + } + $$->name = $2; +- list_init(&$$->instrs); ++ hlsl_block_init(&$$->instrs); + $$->loc = @$; + $$->args_count = 0; + } +@@ -5024,8 +5013,8 @@ attribute: + YYABORT; + } + $$->name = $2; +- list_init(&$$->instrs); +- list_move_tail(&$$->instrs, &$4.instrs->instrs); ++ hlsl_block_init(&$$->instrs); ++ hlsl_block_add_block(&$$->instrs, $4.instrs); + vkd3d_free($4.instrs); + $$->loc = @$; + $$->args_count = $4.args_count; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 4f5a5b02a67..bfa605f4ba7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -161,7 +161,7 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der + /* Split uniforms into two variables representing the constant and temp + * registers, and copy the former to the latter, so that writes to uniforms + * work. */ +-static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *temp) ++static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *temp) + { + struct vkd3d_string_buffer *name; + struct hlsl_ir_var *uniform; +@@ -188,7 +188,7 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct list *instrs, stru + + if (!(load = hlsl_new_var_load(ctx, uniform, &temp->loc))) + return; +- list_add_head(instrs, &load->node.entry); ++ list_add_head(&block->instrs, &load->node.entry); + + if (!(store = hlsl_new_simple_store(ctx, temp, &load->node))) + return; +@@ -301,7 +301,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + return ext_var; + } + +-static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, ++static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; +@@ -364,7 +364,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + } + } + +-static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *lhs, ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &lhs->node.loc; +@@ -406,30 +406,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs + return; + list_add_after(&c->entry, &element_load->node.entry); + +- prepend_input_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ prepend_input_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { +- prepend_input_copy(ctx, instrs, lhs, modifiers, semantic, semantic_index); ++ prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); + } + } + + /* Split inputs into two variables representing the semantic and temp registers, + * and copy the former to the latter, so that writes to input variables work. */ +-static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) ++static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- list_add_head(instrs, &load->node.entry); ++ list_add_head(&block->instrs, &load->node.entry); + +- prepend_input_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + +-static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, ++static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct hlsl_type *type = rhs->node.data_type, *vector_type; +@@ -464,11 +464,11 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + { + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, c, &var->loc))) + return; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + } + else + { +@@ -476,16 +476,16 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct list *instrs, struct + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) + return; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + } + + if (!(store = hlsl_new_simple_store(ctx, output, &load->node))) + return; +- list_add_tail(instrs, &store->entry); ++ hlsl_block_add_instr(block, store); + } + } + +-static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_load *rhs, ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, + unsigned int modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) + { + struct vkd3d_shader_location *loc = &rhs->node.loc; +@@ -520,34 +520,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct list *instrs + + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) + return; +- list_add_tail(instrs, &c->entry); ++ hlsl_block_add_instr(block, c); + + if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc))) + return; +- list_add_tail(instrs, &element_load->node.entry); ++ hlsl_block_add_instr(block, &element_load->node); + +- append_output_copy_recurse(ctx, instrs, element_load, modifiers, semantic, elem_semantic_index); ++ append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); + } + } + else + { +- append_output_copy(ctx, instrs, rhs, modifiers, semantic, semantic_index); ++ append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); + } + } + + /* Split outputs into two variables representing the temp and semantic + * registers, and copy the former to the latter, so that reads from output + * variables work. */ +-static void append_output_var_copy(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_var *var) ++static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *var) + { + struct hlsl_ir_load *load; + + /* This redundant load is expected to be deleted later by DCE. */ + if (!(load = hlsl_new_var_load(ctx, var, &var->loc))) + return; +- list_add_tail(instrs, &load->node.entry); ++ hlsl_block_add_instr(block, &load->node); + +- append_output_copy_recurse(ctx, instrs, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); + } + + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), +@@ -2191,6 +2191,44 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + return true; + } + ++static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add, ++ enum hlsl_regset regset) ++{ ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->bind_count[regset] < to_add->bind_count[regset]) ++ { ++ list_add_before(&var->extern_entry, &to_add->extern_entry); ++ return; ++ } ++ } ++ ++ list_add_tail(list, &to_add->extern_entry); ++} ++ ++static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) ++{ ++ struct list separated_resources; ++ struct hlsl_ir_var *var, *next; ++ ++ list_init(&separated_resources); ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_separated_resource) ++ { ++ list_remove(&var->extern_entry); ++ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES); ++ } ++ } ++ ++ list_move_head(&ctx->extern_vars, &separated_resources); ++ ++ return false; ++} ++ + /* Lower DIV to RCP + MUL. */ + static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +@@ -2738,7 +2776,7 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + count = hlsl_type_component_count(cmp_type); + for (i = 0; i < count; ++i) + { +- if (!(load = hlsl_add_load_component(ctx, &block.instrs, cmp, i, &instr->loc))) ++ if (!(load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc))) + return false; + + if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) +@@ -2868,7 +2906,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + continue; + regset = hlsl_type_get_regset(var->data_type); + +- if (var->reg_reservation.reg_type && var->regs[regset].bind_count) ++ if (var->reg_reservation.reg_type && var->regs[regset].allocation_size) + { + if (var->reg_reservation.reg_type != get_regset_name(regset)) + { +@@ -2886,7 +2924,7 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + var->regs[regset].id = var->reg_reservation.reg_index; + TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, + var->reg_reservation.reg_index, var->reg_reservation.reg_type, +- var->reg_reservation.reg_index + var->regs[regset].bind_count); ++ var->reg_reservation.reg_index + var->regs[regset].allocation_size); + } + } + } +@@ -3144,7 +3182,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); + + ret.id = reg_idx; +- ret.bind_count = 1; ++ ret.allocation_size = 1; + ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); + ret.allocated = true; + return ret; +@@ -3180,7 +3218,7 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo + record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); + + ret.id = reg_idx; +- ret.bind_count = align(reg_size, 4) / 4; ++ ret.allocation_size = align(reg_size, 4) / 4; + ret.allocated = true; + return ret; + } +@@ -3275,6 +3313,7 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n + return false; + + var->objects_usage[regset][index].used = true; ++ var->bind_count[regset] = max(var->bind_count[regset], index + 1); + if (load->sampler.var) + { + var = load->sampler.var; +@@ -3282,6 +3321,7 @@ static bool track_object_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_n + return false; + + var->objects_usage[HLSL_REGSET_SAMPLERS][index].used = true; ++ var->bind_count[HLSL_REGSET_SAMPLERS] = max(var->bind_count[HLSL_REGSET_SAMPLERS], index + 1); + } + + return false; +@@ -3291,7 +3331,7 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) + { + struct hlsl_ir_var *var; + struct hlsl_type *type; +- unsigned int i, k; ++ unsigned int k; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +@@ -3299,15 +3339,10 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) + + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + { +- for (i = 0; i < type->reg_size[k]; ++i) +- { +- bool is_separated = var->is_separated_resource; ++ bool is_separated = var->is_separated_resource; + +- /* Samplers (and textures separated from them) are only allocated until the last +- * used one. */ +- if (var->objects_usage[k][i].used) +- var->regs[k].bind_count = (k == HLSL_REGSET_SAMPLERS || is_separated) ? i + 1 : type->reg_size[k]; +- } ++ if (var->bind_count[k] > 0) ++ var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k]; + } + } + } +@@ -3613,7 +3648,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + { + var->regs[HLSL_REGSET_NUMERIC].allocated = true; + var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; +- var->regs[HLSL_REGSET_NUMERIC].bind_count = 1; ++ var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; + var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; + TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', + var->regs[HLSL_REGSET_NUMERIC], var->data_type)); +@@ -3792,7 +3827,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + } + + buffer->reg.id = buffer->reservation.reg_index; +- buffer->reg.bind_count = 1; ++ buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; + TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); + } +@@ -3802,7 +3837,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + ++index; + + buffer->reg.id = index; +- buffer->reg.bind_count = 1; ++ buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; + TRACE("Allocated %s to cb%u.\n", buffer->name, index); + ++index; +@@ -3842,7 +3877,7 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + else if (var->regs[regset].allocated) + { + start = var->regs[regset].id; +- count = var->regs[regset].bind_count; ++ count = var->regs[regset].allocation_size; + } + else + { +@@ -3873,7 +3908,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- unsigned int count = var->regs[regset].bind_count; ++ unsigned int count = var->regs[regset].allocation_size; + + if (count == 0) + continue; +@@ -4221,7 +4256,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry) + { + if (var->storage_modifiers & HLSL_STORAGE_UNIFORM) +- prepend_uniform_copy(ctx, &body->instrs, var); ++ prepend_uniform_copy(ctx, body, var); + } + + for (i = 0; i < entry_func->parameters.count; ++i) +@@ -4230,7 +4265,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + + if (hlsl_type_is_resource(var->data_type) || (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { +- prepend_uniform_copy(ctx, &body->instrs, var); ++ prepend_uniform_copy(ctx, body, var); + } + else + { +@@ -4246,9 +4281,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } + + if (var->storage_modifiers & HLSL_STORAGE_IN) +- prepend_input_var_copy(ctx, &body->instrs, var); ++ prepend_input_var_copy(ctx, body, var); + if (var->storage_modifiers & HLSL_STORAGE_OUT) +- append_output_var_copy(ctx, &body->instrs, var); ++ append_output_var_copy(ctx, body, var); + } + } + if (entry_func->return_var) +@@ -4257,7 +4292,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, + "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); + +- append_output_var_copy(ctx, &body->instrs, entry_func->return_var); ++ append_output_var_copy(ctx, body, entry_func->return_var); + } + + for (i = 0; i < entry_func->attr_count; ++i) +@@ -4316,6 +4351,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); + hlsl_transform_ir(ctx, track_object_components_usage, body, NULL); ++ sort_synthetic_separated_samplers_first(ctx); + + if (profile->major_version < 4) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index 01c438ae212..41a72ab6c0d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -152,6 +152,51 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + return true; + } + ++static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, ++ "Indefinite logarithm result."); ++ } ++ dst->u[k].f = log2f(src->value.u[k].f); ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (src->value.u[k].d < 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT, ++ "Indefinite logarithm result."); ++ } ++ dst->u[k].d = log2(src->value.u[k].d); ++ break; ++ ++ default: ++ FIXME("Fold 'log2' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) + { +@@ -194,7 +239,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + + assert(type == src->node.data_type->base_type); + +- for (k = 0; k < 4; ++k) ++ for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { +@@ -231,6 +276,51 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + return true; + } + ++static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src->node.data_type->base_type); ++ ++ for (k = 0; k < dst_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (ctx->profile->major_version >= 4 && src->value.u[k].f < 0.0f) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, ++ "Imaginary square root result."); ++ } ++ dst->u[k].f = sqrtf(src->value.u[k].f); ++ if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT, ++ "Infinities and NaNs are not allowed by the shader model."); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ if (src->value.u[k].d < 0.0) ++ { ++ hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT, ++ "Imaginary square root result."); ++ } ++ dst->u[k].d = sqrt(src->value.u[k].d); ++ break; ++ ++ default: ++ FIXME("Fold 'sqrt' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) + { +@@ -348,6 +438,64 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + return true; + } + ++static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ ++ dst->u[0].f = 0.0f; ++ for (k = 0; k < src1->node.data_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; ++ break; ++ default: ++ FIXME("Fold 'dot' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, ++ const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, const struct hlsl_ir_constant *src3) ++{ ++ enum hlsl_base_type type = dst_type->base_type; ++ unsigned int k; ++ ++ assert(type == src1->node.data_type->base_type); ++ assert(type == src2->node.data_type->base_type); ++ assert(type == src3->node.data_type->base_type); ++ assert(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ assert(src3->node.data_type->dimx == 1); ++ ++ dst->u[0].f = src3->value.u[0].f; ++ for (k = 0; k < src1->node.data_type->dimx; ++k) ++ { ++ switch (type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ dst->u[0].f += src1->value.u[k].f * src2->value.u[k].f; ++ break; ++ default: ++ FIXME("Fold 'dp2add' for type %s.\n", debug_hlsl_type(ctx, dst_type)); ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) +@@ -723,7 +871,7 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { +- struct hlsl_ir_constant *arg1, *arg2 = NULL; ++ struct hlsl_ir_constant *arg1, *arg2 = NULL, *arg3 = NULL; + struct hlsl_constant_value res = {0}; + struct hlsl_ir_node *res_node; + struct hlsl_ir_expr *expr; +@@ -751,6 +899,8 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + arg1 = hlsl_ir_constant(expr->operands[0].node); + if (expr->operands[1].node) + arg2 = hlsl_ir_constant(expr->operands[1].node); ++ if (expr->operands[2].node) ++ arg3 = hlsl_ir_constant(expr->operands[2].node); + + switch (expr->op) + { +@@ -762,6 +912,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_cast(ctx, &res, instr->data_type, arg1); + break; + ++ case HLSL_OP1_LOG2: ++ success = fold_log2(ctx, &res, instr->data_type, arg1, &instr->loc); ++ break; ++ + case HLSL_OP1_NEG: + success = fold_neg(ctx, &res, instr->data_type, arg1); + break; +@@ -770,6 +924,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + ++ case HLSL_OP1_SQRT: ++ success = fold_sqrt(ctx, &res, instr->data_type, arg1, &instr->loc); ++ break; ++ + case HLSL_OP2_ADD: + success = fold_add(ctx, &res, instr->data_type, arg1, arg2); + break; +@@ -788,6 +946,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); + break; + ++ case HLSL_OP2_DOT: ++ success = fold_dot(ctx, &res, instr->data_type, arg1, arg2); ++ break; ++ + case HLSL_OP2_DIV: + success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + break; +@@ -824,6 +986,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); + break; + ++ case HLSL_OP3_DP2ADD: ++ success = fold_dp2add(ctx, &res, instr->data_type, arg1, arg2, arg3); ++ break; ++ + default: + FIXME("Fold \"%s\" expression.\n", debug_hlsl_expr_op(expr->op)); + success = false; +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 2725ed80cd1..fa605f185ae 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -2310,7 +2310,7 @@ struct spirv_compiler + + uint32_t binding_idx; + +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; + unsigned int input_control_point_count; + unsigned int output_control_point_count; + bool use_vocp; +@@ -2380,7 +2380,7 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) + + static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, + struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) + { + const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; +@@ -5695,13 +5695,13 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty + } + } + +-static const struct vkd3d_shader_descriptor_info *spirv_compiler_get_descriptor_info( ++static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( + struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, + const struct vkd3d_shader_register_range *range) + { +- const struct vkd3d_shader_scan_descriptor_info *descriptor_info = compiler->scan_descriptor_info; ++ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; + unsigned int register_last = (range->last == ~0u) ? range->first : range->last; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + unsigned int i; + + for (i = 0; i < descriptor_info->descriptor_count; ++i) +@@ -5721,7 +5721,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler + bool raw_structured, uint32_t depth) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + bool uav_read, uav_atomics; + uint32_t sampled_type_id; + SpvImageFormat format; +@@ -5756,7 +5756,7 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi + const struct vkd3d_shader_combined_resource_sampler *current; + uint32_t image_type_id, type_id, ptr_type_id, var_id; + enum vkd3d_shader_binding_flag resource_type_flag; +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + struct vkd3d_symbol symbol; + unsigned int i; + bool depth; +@@ -5889,7 +5889,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + + if (is_uav) + { +- const struct vkd3d_shader_descriptor_info *d; ++ const struct vkd3d_shader_descriptor_info1 *d; + + d = spirv_compiler_get_descriptor_info(compiler, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, &resource->range); +@@ -9635,7 +9635,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, + } + + int spirv_compile(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 351943e2e53..550f9b27cc7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -430,6 +430,8 @@ enum vkd3d_sm4_register_type + VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL = 0x26, + VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL = 0x27, + VKD3D_SM5_RT_OUTPUT_STENCIL_REF = 0x29, ++ ++ VKD3D_SM4_REGISTER_TYPE_COUNT, + }; + + enum vkd3d_sm4_extended_operand_type +@@ -571,6 +573,12 @@ struct sm4_index_range_array + struct sm4_index_range ranges[MAX_REG_OUTPUT * 2]; + }; + ++struct vkd3d_sm4_lookup_tables ++{ ++ const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; ++ const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; ++}; ++ + struct vkd3d_shader_sm4_parser + { + const uint32_t *start, *end, *ptr; +@@ -587,6 +595,8 @@ struct vkd3d_shader_sm4_parser + struct sm4_index_range_array output_index_ranges; + struct sm4_index_range_array patch_constant_index_ranges; + ++ struct vkd3d_sm4_lookup_tables lookup; ++ + struct vkd3d_shader_parser p; + }; + +@@ -1468,50 +1478,10 @@ static const struct vkd3d_sm4_opcode_info opcode_table[] = + {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, + }; + +-static const enum vkd3d_shader_register_type register_type_table[] = +-{ +- /* VKD3D_SM4_RT_TEMP */ VKD3DSPR_TEMP, +- /* VKD3D_SM4_RT_INPUT */ VKD3DSPR_INPUT, +- /* VKD3D_SM4_RT_OUTPUT */ VKD3DSPR_OUTPUT, +- /* VKD3D_SM4_RT_INDEXABLE_TEMP */ VKD3DSPR_IDXTEMP, +- /* VKD3D_SM4_RT_IMMCONST */ VKD3DSPR_IMMCONST, +- /* VKD3D_SM4_RT_IMMCONST64 */ VKD3DSPR_IMMCONST64, +- /* VKD3D_SM4_RT_SAMPLER */ VKD3DSPR_SAMPLER, +- /* VKD3D_SM4_RT_RESOURCE */ VKD3DSPR_RESOURCE, +- /* VKD3D_SM4_RT_CONSTBUFFER */ VKD3DSPR_CONSTBUFFER, +- /* VKD3D_SM4_RT_IMMCONSTBUFFER */ VKD3DSPR_IMMCONSTBUFFER, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM4_RT_PRIMID */ VKD3DSPR_PRIMID, +- /* VKD3D_SM4_RT_DEPTHOUT */ VKD3DSPR_DEPTHOUT, +- /* VKD3D_SM4_RT_NULL */ VKD3DSPR_NULL, +- /* VKD3D_SM4_RT_RASTERIZER */ VKD3DSPR_RASTERIZER, +- /* VKD3D_SM4_RT_OMASK */ VKD3DSPR_SAMPLEMASK, +- /* VKD3D_SM5_RT_STREAM */ VKD3DSPR_STREAM, +- /* VKD3D_SM5_RT_FUNCTION_BODY */ VKD3DSPR_FUNCTIONBODY, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_FUNCTION_POINTER */ VKD3DSPR_FUNCTIONPOINTER, +- /* UNKNOWN */ ~0u, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID */ VKD3DSPR_OUTPOINTID, +- /* VKD3D_SM5_RT_FORK_INSTANCE_ID */ VKD3DSPR_FORKINSTID, +- /* VKD3D_SM5_RT_JOIN_INSTANCE_ID */ VKD3DSPR_JOININSTID, +- /* VKD3D_SM5_RT_INPUT_CONTROL_POINT */ VKD3DSPR_INCONTROLPOINT, +- /* VKD3D_SM5_RT_OUTPUT_CONTROL_POINT */ VKD3DSPR_OUTCONTROLPOINT, +- /* VKD3D_SM5_RT_PATCH_CONSTANT_DATA */ VKD3DSPR_PATCHCONST, +- /* VKD3D_SM5_RT_DOMAIN_LOCATION */ VKD3DSPR_TESSCOORD, +- /* UNKNOWN */ ~0u, +- /* VKD3D_SM5_RT_UAV */ VKD3DSPR_UAV, +- /* VKD3D_SM5_RT_SHARED_MEMORY */ VKD3DSPR_GROUPSHAREDMEM, +- /* VKD3D_SM5_RT_THREAD_ID */ VKD3DSPR_THREADID, +- /* VKD3D_SM5_RT_THREAD_GROUP_ID */ VKD3DSPR_THREADGROUPID, +- /* VKD3D_SM5_RT_LOCAL_THREAD_ID */ VKD3DSPR_LOCALTHREADID, +- /* VKD3D_SM5_RT_COVERAGE */ VKD3DSPR_COVERAGE, +- /* VKD3D_SM5_RT_LOCAL_THREAD_INDEX */ VKD3DSPR_LOCALTHREADINDEX, +- /* VKD3D_SM5_RT_GS_INSTANCE_ID */ VKD3DSPR_GSINSTID, +- /* VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL */ VKD3DSPR_DEPTHOUTGE, +- /* VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL */ VKD3DSPR_DEPTHOUTLE, +- /* VKD3D_SM5_RT_CYCLE_COUNTER */ ~0u, +- /* VKD3D_SM5_RT_OUTPUT_STENCIL_REF */ VKD3DSPR_OUTSTENCILREF, ++struct vkd3d_sm4_register_type_info ++{ ++ enum vkd3d_sm4_register_type sm4_type; ++ enum vkd3d_shader_register_type vkd3d_type; + }; + + static const enum vkd3d_shader_register_precision register_precision_table[] = +@@ -1524,18 +1494,104 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = + /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, + }; + ++struct tpf_writer ++{ ++ struct hlsl_ctx *ctx; ++ struct vkd3d_bytecode_buffer *buffer; ++ struct vkd3d_sm4_lookup_tables lookup; ++}; ++ + static const struct vkd3d_sm4_opcode_info *get_opcode_info(enum vkd3d_sm4_opcode opcode) + { + unsigned int i; + + for (i = 0; i < sizeof(opcode_table) / sizeof(*opcode_table); ++i) + { +- if (opcode == opcode_table[i].opcode) return &opcode_table[i]; ++ if (opcode == opcode_table[i].opcode) ++ return &opcode_table[i]; + } + + return NULL; + } + ++static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) ++{ ++ const struct vkd3d_sm4_register_type_info *info; ++ unsigned int i; ++ ++ static const struct vkd3d_sm4_register_type_info register_type_table[] = ++ { ++ {VKD3D_SM4_RT_TEMP, VKD3DSPR_TEMP}, ++ {VKD3D_SM4_RT_INPUT, VKD3DSPR_INPUT}, ++ {VKD3D_SM4_RT_OUTPUT, VKD3DSPR_OUTPUT}, ++ {VKD3D_SM4_RT_INDEXABLE_TEMP, VKD3DSPR_IDXTEMP}, ++ {VKD3D_SM4_RT_IMMCONST, VKD3DSPR_IMMCONST}, ++ {VKD3D_SM4_RT_IMMCONST64, VKD3DSPR_IMMCONST64}, ++ {VKD3D_SM4_RT_SAMPLER, VKD3DSPR_SAMPLER}, ++ {VKD3D_SM4_RT_RESOURCE, VKD3DSPR_RESOURCE}, ++ {VKD3D_SM4_RT_CONSTBUFFER, VKD3DSPR_CONSTBUFFER}, ++ {VKD3D_SM4_RT_IMMCONSTBUFFER, VKD3DSPR_IMMCONSTBUFFER}, ++ {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID}, ++ {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT}, ++ {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL}, ++ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER}, ++ {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK}, ++ {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM}, ++ {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY}, ++ {VKD3D_SM5_RT_FUNCTION_POINTER, VKD3DSPR_FUNCTIONPOINTER}, ++ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT_ID, VKD3DSPR_OUTPOINTID}, ++ {VKD3D_SM5_RT_FORK_INSTANCE_ID, VKD3DSPR_FORKINSTID}, ++ {VKD3D_SM5_RT_JOIN_INSTANCE_ID, VKD3DSPR_JOININSTID}, ++ {VKD3D_SM5_RT_INPUT_CONTROL_POINT, VKD3DSPR_INCONTROLPOINT}, ++ {VKD3D_SM5_RT_OUTPUT_CONTROL_POINT, VKD3DSPR_OUTCONTROLPOINT}, ++ {VKD3D_SM5_RT_PATCH_CONSTANT_DATA, VKD3DSPR_PATCHCONST}, ++ {VKD3D_SM5_RT_DOMAIN_LOCATION, VKD3DSPR_TESSCOORD}, ++ {VKD3D_SM5_RT_UAV, VKD3DSPR_UAV}, ++ {VKD3D_SM5_RT_SHARED_MEMORY, VKD3DSPR_GROUPSHAREDMEM}, ++ {VKD3D_SM5_RT_THREAD_ID, VKD3DSPR_THREADID}, ++ {VKD3D_SM5_RT_THREAD_GROUP_ID, VKD3DSPR_THREADGROUPID}, ++ {VKD3D_SM5_RT_LOCAL_THREAD_ID, VKD3DSPR_LOCALTHREADID}, ++ {VKD3D_SM5_RT_COVERAGE, VKD3DSPR_COVERAGE}, ++ {VKD3D_SM5_RT_LOCAL_THREAD_INDEX, VKD3DSPR_LOCALTHREADINDEX}, ++ {VKD3D_SM5_RT_GS_INSTANCE_ID, VKD3DSPR_GSINSTID}, ++ {VKD3D_SM5_RT_DEPTHOUT_GREATER_EQUAL, VKD3DSPR_DEPTHOUTGE}, ++ {VKD3D_SM5_RT_DEPTHOUT_LESS_EQUAL, VKD3DSPR_DEPTHOUTLE}, ++ {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF}, ++ }; ++ ++ memset(lookup, 0, sizeof(*lookup)); ++ ++ for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) ++ { ++ info = ®ister_type_table[i]; ++ lookup->register_type_info_from_sm4[info->sm4_type] = info; ++ lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; ++ } ++} ++ ++static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++{ ++ tpf->ctx = ctx; ++ tpf->buffer = buffer; ++ init_sm4_lookup_tables(&tpf->lookup); ++} ++ ++static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) ++{ ++ if (sm4_type >= VKD3D_SM4_REGISTER_TYPE_COUNT) ++ return NULL; ++ return lookup->register_type_info_from_sm4[sm4_type]; ++} ++ ++static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_type( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) ++{ ++ if (vkd3d_type >= VKD3DSPR_COUNT) ++ return NULL; ++ return lookup->register_type_info_from_vkd3d[vkd3d_type]; ++} ++ + static void map_register(const struct vkd3d_shader_sm4_parser *sm4, struct vkd3d_shader_register *reg) + { + switch (sm4->p.shader_version.type) +@@ -1642,6 +1698,7 @@ static bool sm4_register_is_descriptor(enum vkd3d_sm4_register_type register_typ + static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const uint32_t **ptr, const uint32_t *end, + enum vkd3d_data_type data_type, struct vkd3d_shader_register *param, enum vkd3d_shader_src_modifier *modifier) + { ++ const struct vkd3d_sm4_register_type_info *register_type_info; + enum vkd3d_sm4_register_precision precision; + enum vkd3d_sm4_register_type register_type; + enum vkd3d_sm4_extended_operand_type type; +@@ -1656,15 +1713,15 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui + token = *(*ptr)++; + + register_type = (token & VKD3D_SM4_REGISTER_TYPE_MASK) >> VKD3D_SM4_REGISTER_TYPE_SHIFT; +- if (register_type >= ARRAY_SIZE(register_type_table) +- || register_type_table[register_type] == VKD3DSPR_INVALID) ++ register_type_info = get_info_from_sm4_register_type(&priv->lookup, register_type); ++ if (!register_type_info) + { + FIXME("Unhandled register type %#x.\n", register_type); + param->type = VKD3DSPR_TEMP; + } + else + { +- param->type = register_type_table[register_type]; ++ param->type = register_type_info->vkd3d_type; + } + param->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT; + param->non_uniform = false; +@@ -2364,6 +2421,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t + sm4->output_map[e->register_index] = e->semantic_index; + } + ++ init_sm4_lookup_tables(&sm4->lookup); ++ + return true; + } + +@@ -2502,7 +2561,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi + return sm4->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; + } + +-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block); ++static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); + + static bool type_is_integer(const struct hlsl_type *type) + { +@@ -2519,7 +2578,7 @@ static bool type_is_integer(const struct hlsl_type *type) + } + + bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, unsigned int *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) ++ bool output, enum vkd3d_shader_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx) + { + unsigned int i; + +@@ -2529,24 +2588,24 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + bool output; + enum vkd3d_shader_type shader_type; + enum vkd3d_sm4_swizzle_type swizzle_type; +- enum vkd3d_sm4_register_type type; ++ enum vkd3d_shader_register_type type; + bool has_idx; + } + register_table[] = + { +- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false}, +- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false}, +- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false}, ++ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADID, false}, ++ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_THREADGROUPID, false}, ++ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_LOCALTHREADID, false}, + +- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3DSPR_PRIMID, false}, + + /* Put sv_target in this table, instead of letting it fall through to + * default varying allocation, so that the register index matches the + * usage index. */ +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true}, ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_DEPTHOUT, false}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3DSPR_OUTPUT, true}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) +@@ -2555,7 +2614,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + && output == register_table[i].output + && ctx->profile->type == register_table[i].shader_type) + { +- *type = register_table[i].type; ++ if (type) ++ *type = register_table[i].type; + if (swizzle_type) + *swizzle_type = register_table[i].swizzle_type; + *has_idx = register_table[i].has_idx; +@@ -2656,7 +2716,6 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; +- enum vkd3d_sm4_register_type type; + uint32_t usage_idx, reg_idx; + D3D_NAME usage; + bool has_idx; +@@ -2670,14 +2729,13 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + continue; + usage_idx = var->semantic.index; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx)) ++ if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, NULL, &has_idx)) + { + reg_idx = has_idx ? var->semantic.index : ~0u; + } + else + { + assert(var->regs[HLSL_REGSET_NUMERIC].allocated); +- type = VKD3D_SM4_RT_INPUT; + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; + } + +@@ -3061,7 +3119,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + regset = hlsl_type_get_regset(component_type); + regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, regset, k); + +- if (regset_offset > var->regs[regset].bind_count) ++ if (regset_offset > var->regs[regset].allocation_size) + continue; + + if (var->objects_usage[regset][regset_offset].used) +@@ -3134,7 +3192,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; +- extern_resources[*count].bind_count = var->regs[regset].bind_count; ++ extern_resources[*count].bind_count = var->bind_count[regset]; + + ++*count; + } +@@ -3435,8 +3493,8 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod + + struct sm4_register + { +- enum vkd3d_sm4_register_type type; +- uint32_t idx[2]; ++ enum vkd3d_shader_register_type type; ++ struct vkd3d_shader_register_index idx[2]; + unsigned int idx_count; + enum vkd3d_sm4_dimension dim; + uint32_t immconst_uint[4]; +@@ -3484,36 +3542,36 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + + if (regset == HLSL_REGSET_TEXTURES) + { +- reg->type = VKD3D_SM4_RT_RESOURCE; ++ reg->type = VKD3DSPR_RESOURCE; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id; +- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_TEXTURES); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { +- reg->type = VKD3D_SM5_RT_UAV; ++ reg->type = VKD3DSPR_UAV; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id; +- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_UAVS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { +- reg->type = VKD3D_SM4_RT_SAMPLER; ++ reg->type = VKD3DSPR_SAMPLER; + reg->dim = VKD3D_SM4_DIMENSION_NONE; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id; +- reg->idx[0] += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + assert(regset == HLSL_REGSET_SAMPLERS); + reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; +@@ -3523,12 +3581,12 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + + assert(data_type->class <= HLSL_CLASS_VECTOR); +- reg->type = VKD3D_SM4_RT_CONSTBUFFER; ++ reg->type = VKD3DSPR_CONSTBUFFER; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = var->buffer->reg.id; +- reg->idx[1] = offset / 4; ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } +@@ -3543,7 +3601,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + + if (has_idx) + { +- reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + +@@ -3555,11 +3613,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_INPUT; ++ reg->type = VKD3DSPR_INPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +@@ -3574,11 +3632,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + + if (has_idx) + { +- reg->idx[0] = var->semantic.index + offset / 4; ++ reg->idx[0].offset = var->semantic.index + offset / 4; + reg->idx_count = 1; + } + +- if (reg->type == VKD3D_SM4_RT_DEPTHOUT) ++ if (reg->type == VKD3DSPR_DEPTHOUT) + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + else + reg->dim = VKD3D_SM4_DIMENSION_VEC4; +@@ -3589,9 +3647,9 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_OUTPUT; ++ reg->type = VKD3DSPR_OUTPUT; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; +- reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +@@ -3601,11 +3659,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *r + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + assert(hlsl_reg.allocated); +- reg->type = VKD3D_SM4_RT_TEMP; ++ reg->type = VKD3DSPR_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + if (swizzle_type) + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = hlsl_reg.id; ++ reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; + *writemask = hlsl_reg.writemask; + } +@@ -3625,10 +3683,10 @@ static void sm4_register_from_node(struct sm4_register *reg, unsigned int *write + enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr) + { + assert(instr->reg.allocated); +- reg->type = VKD3D_SM4_RT_TEMP; ++ reg->type = VKD3DSPR_TEMP; + reg->dim = VKD3D_SM4_DIMENSION_VEC4; + *swizzle_type = VKD3D_SM4_SWIZZLE_VEC4; +- reg->idx[0] = instr->reg.id; ++ reg->idx[0].offset = instr->reg.id; + reg->idx_count = 1; + *writemask = instr->reg.writemask; + } +@@ -3644,7 +3702,7 @@ static void sm4_src_from_constant_value(struct sm4_src_register *src, + const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) + { + src->swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- src->reg.type = VKD3D_SM4_RT_IMMCONST; ++ src->reg.type = VKD3DSPR_IMMCONST; + if (width == 1) + { + src->reg.dim = VKD3D_SM4_DIMENSION_SCALAR; +@@ -3681,17 +3739,100 @@ static void sm4_src_from_node(struct sm4_src_register *src, + src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); + } + +-static uint32_t sm4_encode_register(const struct sm4_register *reg) ++static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct sm4_dst_register *dst) ++{ ++ const struct vkd3d_sm4_register_type_info *register_type_info; ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; ++ uint32_t sm4_reg_type, reg_dim; ++ uint32_t token = 0; ++ unsigned int j; ++ ++ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, dst->reg.type); ++ if (!register_type_info) ++ { ++ FIXME("Unhandled vkd3d-shader register type %#x.\n", dst->reg.type); ++ sm4_reg_type = VKD3D_SM4_RT_TEMP; ++ } ++ else ++ { ++ sm4_reg_type = register_type_info->sm4_type; ++ } ++ ++ reg_dim = dst->reg.dim; ++ ++ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; ++ token |= dst->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; ++ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ token |= dst->writemask << VKD3D_SM4_WRITEMASK_SHIFT; ++ put_u32(buffer, token); ++ ++ for (j = 0; j < dst->reg.idx_count; ++j) ++ { ++ put_u32(buffer, dst->reg.idx[j].offset); ++ assert(!dst->reg.idx[j].rel_addr); ++ } ++} ++ ++static void sm4_write_src_register(const struct tpf_writer *tpf, const struct sm4_src_register *src) + { +- return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT) +- | (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT) +- | (reg->dim << VKD3D_SM4_DIMENSION_SHIFT); ++ const struct vkd3d_sm4_register_type_info *register_type_info; ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; ++ uint32_t sm4_reg_type, reg_dim; ++ uint32_t token = 0; ++ unsigned int j; ++ ++ register_type_info = get_info_from_vkd3d_register_type(&tpf->lookup, src->reg.type); ++ if (!register_type_info) ++ { ++ FIXME("Unhandled vkd3d-shader register type %#x.\n", src->reg.type); ++ sm4_reg_type = VKD3D_SM4_RT_TEMP; ++ } ++ else ++ { ++ sm4_reg_type = register_type_info->sm4_type; ++ } ++ ++ reg_dim = src->reg.dim; ++ ++ token |= sm4_reg_type << VKD3D_SM4_REGISTER_TYPE_SHIFT; ++ token |= src->reg.idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT; ++ token |= reg_dim << VKD3D_SM4_DIMENSION_SHIFT; ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ { ++ token |= (uint32_t)src->swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; ++ token |= src->swizzle << VKD3D_SM4_SWIZZLE_SHIFT; ++ } ++ if (src->reg.mod) ++ token |= VKD3D_SM4_EXTENDED_OPERAND; ++ put_u32(buffer, token); ++ ++ if (src->reg.mod) ++ put_u32(buffer, (src->reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) ++ | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); ++ ++ for (j = 0; j < src->reg.idx_count; ++j) ++ { ++ put_u32(buffer, src->reg.idx[j].offset); ++ assert(!src->reg.idx[j].rel_addr); ++ } ++ ++ if (src->reg.type == VKD3DSPR_IMMCONST) ++ { ++ put_u32(buffer, src->reg.immconst_uint[0]); ++ if (reg_dim == VKD3D_SM4_DIMENSION_VEC4) ++ { ++ put_u32(buffer, src->reg.immconst_uint[1]); ++ put_u32(buffer, src->reg.immconst_uint[2]); ++ put_u32(buffer, src->reg.immconst_uint[3]); ++ } ++ } + } + + static uint32_t sm4_register_order(const struct sm4_register *reg) + { + uint32_t order = 1; +- if (reg->type == VKD3D_SM4_RT_IMMCONST) ++ if (reg->type == VKD3DSPR_IMMCONST) + order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1; + order += reg->idx_count; + if (reg->mod) +@@ -3699,8 +3840,9 @@ static uint32_t sm4_register_order(const struct sm4_register *reg) + return order; + } + +-static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr) ++static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4_instruction *instr) + { ++ struct vkd3d_bytecode_buffer *buffer = tpf->buffer; + uint32_t token = instr->opcode; + unsigned int size = 1, i, j; + +@@ -3728,43 +3870,10 @@ static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const st + } + + for (i = 0; i < instr->dst_count; ++i) +- { +- token = sm4_encode_register(&instr->dsts[i].reg); +- if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) +- token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT; +- put_u32(buffer, token); +- +- for (j = 0; j < instr->dsts[i].reg.idx_count; ++j) +- put_u32(buffer, instr->dsts[i].reg.idx[j]); +- } ++ sm4_write_dst_register(tpf, &instr->dsts[i]); + + for (i = 0; i < instr->src_count; ++i) +- { +- token = sm4_encode_register(&instr->srcs[i].reg); +- token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT; +- token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT; +- if (instr->srcs[i].reg.mod) +- token |= VKD3D_SM4_EXTENDED_OPERAND; +- put_u32(buffer, token); +- +- if (instr->srcs[i].reg.mod) +- put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT) +- | VKD3D_SM4_EXTENDED_OPERAND_MODIFIER); +- +- for (j = 0; j < instr->srcs[i].reg.idx_count; ++j) +- put_u32(buffer, instr->srcs[i].reg.idx[j]); +- +- if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST) +- { +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]); +- if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4) +- { +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]); +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]); +- put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]); +- } +- } +- } ++ sm4_write_src_register(tpf, &instr->srcs[i]); + + if (instr->byte_stride) + put_u32(buffer, instr->byte_stride); +@@ -3800,25 +3909,25 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + return true; + } + +-static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer) ++static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) + { + const struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4, +- .srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER, +- .srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4}, ++ .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, ++ .srcs[0].reg.idx[0].offset = cbuffer->reg.id, ++ .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, + .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4, + .srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W), + .src_count = 1, + }; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_samplers(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct extern_resource *resource) ++static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) + { + struct hlsl_type *component_type; + unsigned int i; +@@ -3826,12 +3935,12 @@ static void write_sm4_dcl_samplers(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + { + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + +- .dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER, ++ .dsts[0].reg.type = VKD3DSPR_SAMPLER, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + +- component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); ++ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.opcode |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; +@@ -3843,13 +3952,13 @@ static void write_sm4_dcl_samplers(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + +- instr.dsts[0].reg.idx[0] = resource->id + i; +- write_sm4_instruction(buffer, &instr); ++ instr.dsts[0].reg.idx[0].offset = resource->id + i; ++ write_sm4_instruction(tpf, &instr); + } + } + +-static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct extern_resource *resource, bool uav) ++static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct extern_resource *resource, ++ bool uav) + { + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + struct hlsl_type *component_type; +@@ -3858,7 +3967,7 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + + assert(resource->regset == regset); + +- component_type = hlsl_type_get_component_type(ctx, resource->data_type, 0); ++ component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + + for (i = 0; i < resource->bind_count; ++i) + { +@@ -3867,8 +3976,8 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + + instr = (struct sm4_instruction) + { +- .dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE, +- .dsts[0].reg.idx = {resource->id + i}, ++ .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, ++ .dsts[0].reg.idx[0].offset = resource->id + i, + .dsts[0].reg.idx_count = 1, + .dst_count = 1, + +@@ -3901,13 +4010,13 @@ static void write_sm4_dcl_textures(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + instr.opcode |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + } + +-static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var) ++static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) + { +- const struct hlsl_profile_info *profile = ctx->profile; ++ const struct hlsl_profile_info *profile = tpf->ctx->profile; + const bool output = var->is_output_semantic; + D3D_NAME usage; + bool has_idx; +@@ -3918,11 +4027,11 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + .dst_count = 1, + }; + +- if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) ++ if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx)) + { + if (has_idx) + { +- instr.dsts[0].reg.idx[0] = var->semantic.index; ++ instr.dsts[0].reg.idx[0].offset = var->semantic.index; + instr.dsts[0].reg.idx_count = 1; + } + else +@@ -3933,16 +4042,16 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + } + else + { +- instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT; +- instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id; ++ instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + instr.dsts[0].reg.idx_count = 1; + instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + +- if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT) ++ if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) + instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + +- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); ++ hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); + if (usage == ~0u) + usage = D3D_NAME_UNDEFINED; + +@@ -4002,10 +4111,10 @@ static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + break; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count) ++static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) + { + struct sm4_instruction instr = + { +@@ -4015,33 +4124,35 @@ static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t t + .idx_count = 1, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3]) ++static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) + { + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, + +- .idx = {thread_count[0], thread_count[1], thread_count[2]}, ++ .idx[0] = thread_count[0], ++ .idx[1] = thread_count[1], ++ .idx[2] = thread_count[2], + .idx_count = 3, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer) ++static void write_sm4_ret(const struct tpf_writer *tpf) + { + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_RET, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod) + { + struct sm4_instruction instr; +@@ -4056,12 +4167,11 @@ static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_ + instr.srcs[0].reg.mod = src_mod; + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, +- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, +- const struct hlsl_ir_node *src) ++static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct hlsl_ir_node *dst, unsigned dst_idx, const struct hlsl_ir_node *src) + { + struct sm4_instruction instr; + +@@ -4071,7 +4181,7 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); +- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; +@@ -4079,10 +4189,10 @@ static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffe + sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask); + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -4097,11 +4207,11 @@ static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + + /* dp# instructions don't map the swizzle. */ +-static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode, ++static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, + const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { + struct sm4_instruction instr; +@@ -4116,10 +4226,10 @@ static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum v + sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer, ++static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, + enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx, + const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) + { +@@ -4131,7 +4241,7 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff + assert(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + assert(1 - dst_idx >= 0); +- instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL; ++ instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE; + instr.dsts[1 - dst_idx].reg.idx_count = 0; + instr.dst_count = 2; +@@ -4140,15 +4250,15 @@ static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buff + sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, +- const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, +- const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) ++static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++ const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, ++ const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, ++ enum hlsl_sampler_dim dim) + { +- const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, resource); ++ const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); + bool multisampled = resource_type->base_type == HLSL_TYPE_TEXTURE + && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); + bool uav = (hlsl_type_get_regset(resource_type) == HLSL_REGSET_UAVS); +@@ -4165,7 +4275,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } +@@ -4188,7 +4298,7 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + + sm4_src_from_node(&instr.srcs[0], coords, coords_writemask); + +- sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + + instr.src_count = 2; + +@@ -4203,13 +4313,13 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + + memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); + instr.srcs[2].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- reg->type = VKD3D_SM4_RT_IMMCONST; ++ reg->type = VKD3DSPR_IMMCONST; + reg->dim = VKD3D_SM4_DIMENSION_SCALAR; + reg->immconst_uint[0] = index->value.u[0].u; + } +- else if (ctx->profile->major_version == 4 && ctx->profile->minor_version == 0) ++ else if (tpf->ctx->profile->major_version == 4 && tpf->ctx->profile->minor_version == 0) + { +- hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); ++ hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); + } + else + { +@@ -4219,11 +4329,10 @@ static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + ++instr.src_count; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_resource_load *load) ++static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *coords = load->coords.node; +@@ -4267,7 +4376,7 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7."); + return; + } +@@ -4277,8 +4386,8 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); +- sm4_src_from_deref(ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 3; + + if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD +@@ -4300,11 +4409,10 @@ static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + ++instr.src_count; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_sampleinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_resource_load *load) ++static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; +@@ -4320,14 +4428,13 @@ static void write_sm4_sampleinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + +- sm4_src_from_deref(ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], resource, instr.dsts[0].writemask); + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_resinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_resource_load *load) ++static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_deref *resource = &load->resource; + const struct hlsl_ir_node *dst = &load->node; +@@ -4344,10 +4451,10 @@ static void write_sm4_resinfo(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[1], resource, instr.dsts[0].writemask); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + + static bool type_is_float(const struct hlsl_type *type) +@@ -4355,8 +4462,7 @@ static bool type_is_float(const struct hlsl_type *type) + return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF; + } + +-static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr, ++static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, + const struct hlsl_ir_node *arg, uint32_t mask) + { + struct sm4_instruction instr; +@@ -4369,16 +4475,15 @@ static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx, + + sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask); + instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; +- instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST; ++ instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; + instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR; + instr.srcs[1].reg.immconst_uint[0] = mask; + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_cast(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) + { + static const union + { +@@ -4400,23 +4505,23 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u); ++ write_sm4_cast_from_bool(tpf, expr, arg1, one.u); + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); + break; + + default: +@@ -4429,20 +4534,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ write_sm4_cast_from_bool(tpf, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); + break; + + default: +@@ -4455,20 +4560,20 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + { + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1); ++ write_sm4_cast_from_bool(tpf, expr, arg1, 1); + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); + break; + + default: +@@ -4477,7 +4582,7 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + break; + + case HLSL_TYPE_DOUBLE: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); + break; + + case HLSL_TYPE_BOOL: +@@ -4487,26 +4592,25 @@ static void write_sm4_cast(struct hlsl_ctx *ctx, + } + } + +-static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) ++static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, ++ const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) + { + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; + +- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); ++ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); + sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); + instr.src_count = 2; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_expr(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr) ++static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) + { + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_ir_node *arg2 = expr->operands[1].node; +@@ -4515,7 +4619,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + + assert(expr->node.reg.allocated); + +- if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type))) ++ if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) + return; + + switch (expr->op) +@@ -4524,181 +4628,181 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_BIT_NOT: + assert(type_is_integer(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_CAST: +- write_sm4_cast(ctx, buffer, expr); ++ write_sm4_cast(tpf, expr); + break; + + case HLSL_OP1_COS: + assert(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); ++ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + + case HLSL_OP1_DSX: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_COARSE: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_FINE: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_COARSE: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_FINE: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_EXP2: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FLOOR: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FRACT: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOGIC_NOT: + assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_NEG: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP1_REINTERPRET: +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_OP1_ROUND: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_RSQ: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP1_SIN: + assert(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); ++ write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + + case HLSL_OP1_SQRT: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_TRUNC: + assert(type_is_float(dst_type)); +- write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); ++ write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + break; + + case HLSL_OP2_ADD: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); + } + break; + + case HLSL_OP2_BIT_AND: + assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_OR: + assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + assert(type_is_integer(dst_type)); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_DIV: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); + } + break; + +@@ -4709,15 +4813,15 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (arg1->data_type->dimx) + { + case 4: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); + break; + + case 3: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); + break; + + case 2: +- write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); ++ write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); + break; + + case 1: +@@ -4727,7 +4831,7 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); + } + break; + +@@ -4740,18 +4844,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4766,21 +4870,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4795,21 +4899,21 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4817,37 +4921,37 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + + case HLSL_OP2_LOGIC_AND: + assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: + assert(dst_type->base_type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_MAX: + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); + } + break; + +@@ -4855,19 +4959,19 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); + } + break; + +@@ -4875,11 +4979,11 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); + } + break; + +@@ -4887,18 +4991,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (dst_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + /* Using IMUL instead of UMUL because we're taking the low + * bits, and the native compiler generates IMUL. */ +- write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); ++ write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); + } + break; + +@@ -4911,18 +5015,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + switch (src_type->base_type) + { + case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); + break; + + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); ++ write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", +- debug_hlsl_type(ctx, src_type)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", ++ debug_hlsl_type(tpf->ctx, src_type)); + break; + } + break; +@@ -4931,18 +5035,18 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, + case HLSL_OP2_RSHIFT: + assert(type_is_integer(dst_type)); + assert(dst_type->base_type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, ++ write_sm4_binary_op(tpf, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + &expr->node, arg1, arg2); + break; + + default: +- hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); + } + +- hlsl_release_string_buffer(ctx, dst_type_string); ++ hlsl_release_string_buffer(tpf->ctx, dst_type_string); + } + +-static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff) ++static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) + { + struct sm4_instruction instr = + { +@@ -4953,26 +5057,25 @@ static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + assert(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + +- write_sm4_block(ctx, buffer, &iff->then_block); ++ write_sm4_block(tpf, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + instr.opcode = VKD3D_SM4_OP_ELSE; + instr.src_count = 0; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + +- write_sm4_block(ctx, buffer, &iff->else_block); ++ write_sm4_block(tpf, &iff->else_block); + } + + instr.opcode = VKD3D_SM4_OP_ENDIF; + instr.src_count = 0; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_jump(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump) ++static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_jump *jump) + { + struct sm4_instruction instr = {0}; + +@@ -4996,11 +5099,11 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, + vkd3d_unreachable(); + + default: +- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); ++ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + return; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + + /* Does this variable's data come directly from the API user, rather than being +@@ -5014,8 +5117,7 @@ static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *va + return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; + } + +-static void write_sm4_load(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load) ++static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) + { + const struct hlsl_type *type = load->node.data_type; + struct sm4_instruction instr; +@@ -5026,7 +5128,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, + instr.dst_count = 1; + + assert(type->class <= HLSL_CLASS_LAST_NUMERIC); +- if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(ctx, load->src.var)) ++ if (type->base_type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + { + struct hlsl_constant_value value; + +@@ -5035,7 +5137,7 @@ static void write_sm4_load(struct hlsl_ctx *ctx, + + instr.opcode = VKD3D_SM4_OP_MOVC; + +- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); + + memset(&value, 0xff, sizeof(value)); + sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].writemask); +@@ -5047,32 +5149,31 @@ static void write_sm4_load(struct hlsl_ctx *ctx, + { + instr.opcode = VKD3D_SM4_OP_MOV; + +- sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[0], &load->src, instr.dsts[0].writemask); + instr.src_count = 1; + } + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_loop(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop) ++static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) + { + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_LOOP, + }; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + +- write_sm4_block(ctx, buffer, &loop->body); ++ write_sm4_block(tpf, &loop->body); + + instr.opcode = VKD3D_SM4_OP_ENDLOOP; +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *dst, const struct hlsl_deref *resource, const struct hlsl_deref *sampler, +- const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset) ++static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst, ++ const struct hlsl_deref *resource, const struct hlsl_deref *sampler, const struct hlsl_ir_node *coords, ++ unsigned int swizzle, const struct hlsl_ir_node *texel_offset) + { + struct sm4_src_register *src; + struct sm4_instruction instr; +@@ -5090,9 +5191,9 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + { + if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) + { +- if (ctx->profile->major_version < 5) ++ if (tpf->ctx->profile->major_version < 5) + { +- hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, + "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); + return; + } +@@ -5101,19 +5202,18 @@ static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + } + } + +- sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); ++ sm4_src_from_deref(tpf->ctx, &instr.srcs[instr.src_count++], resource, instr.dsts[0].writemask); + + src = &instr.srcs[instr.src_count++]; +- sm4_src_from_deref(ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf->ctx, src, sampler, VKD3DSP_WRITEMASK_ALL); + src->reg.dim = VKD3D_SM4_DIMENSION_VEC4; + src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR; + src->swizzle = swizzle; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_resource_load(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load) ++static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) + { + const struct hlsl_ir_node *texel_offset = load->texel_offset.node; + const struct hlsl_ir_node *sample_index = load->sample_index.node; +@@ -5121,20 +5221,20 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, + + if (load->sampler.var && !load->sampler.var->is_uniform) + { +- hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); ++ hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); + return; + } + + if (!load->resource.var->is_uniform) + { +- hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); ++ hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); + return; + } + + switch (load->load_type) + { + case HLSL_RESOURCE_LOAD: +- write_sm4_ld(ctx, buffer, &load->node, &load->resource, ++ write_sm4_ld(tpf, &load->node, &load->resource, + coords, sample_index, texel_offset, load->sampling_dim); + break; + +@@ -5146,61 +5246,59 @@ static void write_sm4_resource_load(struct hlsl_ctx *ctx, + case HLSL_RESOURCE_SAMPLE_GRAD: + /* Combined sample expressions were lowered. */ + assert(load->sampler.var); +- write_sm4_sample(ctx, buffer, load); ++ write_sm4_sample(tpf, load); + break; + + case HLSL_RESOURCE_GATHER_RED: +- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(X, X, X, X), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_GREEN: +- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_BLUE: +- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset); + break; + + case HLSL_RESOURCE_GATHER_ALPHA: +- write_sm4_gather(ctx, buffer, &load->node, &load->resource, &load->sampler, coords, ++ write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, + HLSL_SWIZZLE(W, W, W, W), texel_offset); + break; + + case HLSL_RESOURCE_SAMPLE_INFO: +- write_sm4_sampleinfo(ctx, buffer, load); ++ write_sm4_sampleinfo(tpf, load); + break; + + case HLSL_RESOURCE_RESINFO: +- write_sm4_resinfo(ctx, buffer, load); ++ write_sm4_resinfo(tpf, load); + break; + } + } + +-static void write_sm4_resource_store(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store) ++static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) + { +- struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); ++ struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); + + if (!store->resource.var->is_uniform) + { +- hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); ++ hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); + return; + } + + if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { +- hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); ++ hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); + return; + } + +- write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node); ++ write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); + } + +-static void write_sm4_store(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store) ++static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) + { + const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm4_instruction instr; +@@ -5209,18 +5307,17 @@ static void write_sm4_store(struct hlsl_ctx *ctx, + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_MOV; + +- sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); ++ sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs); + instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask); + instr.dst_count = 1; + + sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask); + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_swizzle(struct hlsl_ctx *ctx, +- struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle) ++static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) + { + struct sm4_instruction instr; + unsigned int writemask; +@@ -5236,11 +5333,10 @@ static void write_sm4_swizzle(struct hlsl_ctx *ctx, + swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask); + instr.src_count = 1; + +- write_sm4_instruction(buffer, &instr); ++ write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block) ++static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) + { + const struct hlsl_ir_node *instr; + +@@ -5250,12 +5346,12 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + { + if (instr->data_type->class == HLSL_CLASS_MATRIX) + { +- hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Matrix operations need to be lowered."); + break; + } + else if (instr->data_type->class == HLSL_CLASS_OBJECT) + { +- hlsl_fixme(ctx, &instr->loc, "Object copy."); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Object copy."); + break; + } + +@@ -5275,43 +5371,43 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + vkd3d_unreachable(); + + case HLSL_IR_EXPR: +- write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr)); ++ write_sm4_expr(tpf, hlsl_ir_expr(instr)); + break; + + case HLSL_IR_IF: +- write_sm4_if(ctx, buffer, hlsl_ir_if(instr)); ++ write_sm4_if(tpf, hlsl_ir_if(instr)); + break; + + case HLSL_IR_JUMP: +- write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr)); ++ write_sm4_jump(tpf, hlsl_ir_jump(instr)); + break; + + case HLSL_IR_LOAD: +- write_sm4_load(ctx, buffer, hlsl_ir_load(instr)); ++ write_sm4_load(tpf, hlsl_ir_load(instr)); + break; + + case HLSL_IR_RESOURCE_LOAD: +- write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr)); ++ write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); + break; + + case HLSL_IR_RESOURCE_STORE: +- write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr)); ++ write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); + break; + + case HLSL_IR_LOOP: +- write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr)); ++ write_sm4_loop(tpf, hlsl_ir_loop(instr)); + break; + + case HLSL_IR_STORE: +- write_sm4_store(ctx, buffer, hlsl_ir_store(instr)); ++ write_sm4_store(tpf, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWIZZLE: +- write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr)); ++ write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); + break; + + default: +- hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); ++ hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + } + } + } +@@ -5326,6 +5422,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct hlsl_buffer *cbuffer; + const struct hlsl_ir_var *var; + size_t token_count_position; ++ struct tpf_writer tpf; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { +@@ -5340,6 +5437,8 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + VKD3D_SM4_LIB, + }; + ++ tpf_writer_init(&tpf, ctx, &buffer); ++ + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + + put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type])); +@@ -5348,7 +5447,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- write_sm4_dcl_constant_buffer(&buffer, cbuffer); ++ write_sm4_dcl_constant_buffer(&tpf, cbuffer); + } + + for (i = 0; i < extern_resources_count; ++i) +@@ -5356,28 +5455,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + const struct extern_resource *resource = &extern_resources[i]; + + if (resource->regset == HLSL_REGSET_SAMPLERS) +- write_sm4_dcl_samplers(ctx, &buffer, resource); ++ write_sm4_dcl_samplers(&tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) +- write_sm4_dcl_textures(ctx, &buffer, resource, false); ++ write_sm4_dcl_textures(&tpf, resource, false); + else if (resource->regset == HLSL_REGSET_UAVS) +- write_sm4_dcl_textures(ctx, &buffer, resource, true); ++ write_sm4_dcl_textures(&tpf, resource, true); + } + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) +- write_sm4_dcl_semantic(ctx, &buffer, var); ++ write_sm4_dcl_semantic(&tpf, var); + } + + if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) +- write_sm4_dcl_thread_group(&buffer, ctx->thread_count); ++ write_sm4_dcl_thread_group(&tpf, ctx->thread_count); + + if (ctx->temp_count) +- write_sm4_dcl_temps(&buffer, ctx->temp_count); ++ write_sm4_dcl_temps(&tpf, ctx->temp_count); + +- write_sm4_block(ctx, &buffer, &entry_func->body); ++ write_sm4_block(&tpf, &entry_func->body); + +- write_sm4_ret(&buffer); ++ write_sm4_ret(&tpf); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 512d9ea41e7..c777bad2206 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -579,7 +579,7 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig + + struct vkd3d_shader_scan_context + { +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info; + size_t descriptors_size; + + struct vkd3d_shader_message_context *message_context; +@@ -599,20 +599,12 @@ struct vkd3d_shader_scan_context + size_t cf_info_size; + size_t cf_info_count; + +- struct +- { +- unsigned int id; +- unsigned int descriptor_idx; +- } *uav_ranges; +- size_t uav_ranges_size; +- size_t uav_range_count; +- + enum vkd3d_shader_api_version api_version; + }; + + static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context) + { + unsigned int i; +@@ -635,7 +627,6 @@ static void vkd3d_shader_scan_context_init(struct vkd3d_shader_scan_context *con + + static void vkd3d_shader_scan_context_cleanup(struct vkd3d_shader_scan_context *context) + { +- vkd3d_free(context->uav_ranges); + vkd3d_free(context->cf_info); + } + +@@ -703,18 +694,23 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_find_innermost_loop_cf_inf + return NULL; + } + +-static struct vkd3d_shader_descriptor_info *vkd3d_shader_scan_get_uav_descriptor_info( +- const struct vkd3d_shader_scan_context *context, unsigned int range_id) ++static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_context *context, ++ const struct vkd3d_shader_register *reg, uint32_t flag) + { ++ unsigned int range_id = reg->idx[0].offset; + unsigned int i; + +- for (i = 0; i < context->uav_range_count; ++i) ++ if (!context->scan_descriptor_info) ++ return; ++ ++ for (i = 0; i < context->scan_descriptor_info->descriptor_count; ++i) + { +- if (context->uav_ranges[i].id == range_id) +- return &context->scan_descriptor_info->descriptors[context->uav_ranges[i].descriptor_idx]; ++ if (context->scan_descriptor_info->descriptors[i].register_id == range_id) ++ { ++ context->scan_descriptor_info->descriptors[i].flags |= flag; ++ break; ++ } + } +- +- return NULL; + } + + static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) +@@ -730,13 +726,7 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr + static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + } + + static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) +@@ -749,13 +739,7 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in + static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER); + } + + static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) +@@ -768,22 +752,16 @@ static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_ + static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_register *reg) + { +- struct vkd3d_shader_descriptor_info *d; +- +- if (!context->scan_descriptor_info) +- return; +- +- d = vkd3d_shader_scan_get_uav_descriptor_info(context, reg->idx[0].offset); +- d->flags |= VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS; ++ vkd3d_shader_scan_add_uav_flag(context, reg, VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_ATOMICS); + } + + static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *context, +- enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register_range *range, +- enum vkd3d_shader_resource_type resource_type, enum vkd3d_shader_resource_data_type resource_data_type, +- unsigned int flags) ++ enum vkd3d_shader_descriptor_type type, const struct vkd3d_shader_register *reg, ++ const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, ++ enum vkd3d_shader_resource_data_type resource_data_type, unsigned int flags) + { +- struct vkd3d_shader_scan_descriptor_info *info = context->scan_descriptor_info; +- struct vkd3d_shader_descriptor_info *d; ++ struct vkd3d_shader_scan_descriptor_info1 *info = context->scan_descriptor_info; ++ struct vkd3d_shader_descriptor_info1 *d; + + if (!vkd3d_array_reserve((void **)&info->descriptors, &context->descriptors_size, + info->descriptor_count + 1, sizeof(*info->descriptors))) +@@ -794,6 +772,7 @@ static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *c + + d = &info->descriptors[info->descriptor_count]; + d->type = type; ++ d->register_id = reg->idx[0].offset; + d->register_space = range->space; + d->register_index = range->first; + d->resource_type = resource_type; +@@ -805,23 +784,6 @@ static bool vkd3d_shader_scan_add_descriptor(struct vkd3d_shader_scan_context *c + return true; + } + +-static bool vkd3d_shader_scan_add_uav_range(struct vkd3d_shader_scan_context *context, +- unsigned int id, unsigned int descriptor_idx) +-{ +- if (!vkd3d_array_reserve((void **)&context->uav_ranges, &context->uav_ranges_size, +- context->uav_range_count + 1, sizeof(*context->uav_ranges))) +- { +- ERR("Failed to allocate UAV range.\n"); +- return false; +- } +- +- context->uav_ranges[context->uav_range_count].id = id; +- context->uav_ranges[context->uav_range_count].descriptor_idx = descriptor_idx; +- ++context->uav_range_count; +- +- return true; +-} +- + static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_scan_context *context, + const struct vkd3d_shader_instruction *instruction) + { +@@ -830,7 +792,7 @@ static void vkd3d_shader_scan_constant_buffer_declaration(struct vkd3d_shader_sc + if (!context->scan_descriptor_info) + return; + +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->range, ++ vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, &cb->src.reg, &cb->range, + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); + } + +@@ -847,7 +809,7 @@ static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_conte + flags = VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; + else + flags = 0; +- vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->range, ++ vkd3d_shader_scan_add_descriptor(context, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, &sampler->src.reg, &sampler->range, + VKD3D_SHADER_RESOURCE_NONE, VKD3D_SHADER_RESOURCE_DATA_UINT, flags); + } + +@@ -864,10 +826,8 @@ static void vkd3d_shader_scan_resource_declaration(struct vkd3d_shader_scan_cont + type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + else + type = VKD3D_SHADER_DESCRIPTOR_TYPE_SRV; +- vkd3d_shader_scan_add_descriptor(context, type, &resource->range, resource_type, resource_data_type, 0); +- if (type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) +- vkd3d_shader_scan_add_uav_range(context, resource->reg.reg.idx[0].offset, +- context->scan_descriptor_info->descriptor_count - 1); ++ vkd3d_shader_scan_add_descriptor(context, type, &resource->reg.reg, &resource->range, ++ resource_type, resource_data_type, 0); + } + + static void vkd3d_shader_scan_typed_resource_declaration(struct vkd3d_shader_scan_context *context, +@@ -1117,24 +1077,64 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + return VKD3D_OK; + } + ++static enum vkd3d_result convert_descriptor_info(struct vkd3d_shader_scan_descriptor_info *info, ++ const struct vkd3d_shader_scan_descriptor_info1 *info1) ++{ ++ unsigned int i; ++ ++ if (!(info->descriptors = vkd3d_calloc(info1->descriptor_count, sizeof(*info->descriptors)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ for (i = 0; i < info1->descriptor_count; ++i) ++ { ++ const struct vkd3d_shader_descriptor_info1 *src = &info1->descriptors[i]; ++ struct vkd3d_shader_descriptor_info *dst = &info->descriptors[i]; ++ ++ dst->type = src->type; ++ dst->register_space = src->register_space; ++ dst->register_index = src->register_index; ++ dst->resource_type = src->resource_type; ++ dst->resource_data_type = src->resource_data_type; ++ dst->flags = src->flags; ++ dst->count = src->count; ++ } ++ info->descriptor_count = info1->descriptor_count; ++ ++ return VKD3D_OK; ++} ++ ++static void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info) ++{ ++ TRACE("scan_descriptor_info %p.\n", scan_descriptor_info); ++ ++ vkd3d_free(scan_descriptor_info->descriptors); ++} ++ + static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info, +- struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) ++ struct vkd3d_shader_message_context *message_context, ++ struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1, struct vkd3d_shader_parser *parser) + { +- struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; ++ struct vkd3d_shader_scan_descriptor_info *descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; + struct vkd3d_shader_instruction *instruction; + struct vkd3d_shader_scan_context context; + int ret = VKD3D_OK; + unsigned int i; + +- if ((scan_descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO))) ++ descriptor_info = vkd3d_find_struct(compile_info->next, SCAN_DESCRIPTOR_INFO); ++ if (descriptor_info1) ++ { ++ descriptor_info1->descriptors = NULL; ++ descriptor_info1->descriptor_count = 0; ++ } ++ else if (descriptor_info) + { +- scan_descriptor_info->descriptors = NULL; +- scan_descriptor_info->descriptor_count = 0; ++ descriptor_info1 = &local_descriptor_info1; + } + signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); + +- vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); ++ vkd3d_shader_scan_context_init(&context, compile_info, descriptor_info1, message_context); + + if (TRACE_ON()) + { +@@ -1145,19 +1145,16 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + { + instruction = &parser->instructions.elements[i]; + if ((ret = vkd3d_shader_scan_instruction(&context, instruction)) < 0) +- { +- if (scan_descriptor_info) +- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); + break; +- } + } + + for (i = 0; i < ARRAY_SIZE(parser->shader_desc.flat_constant_count); ++i) + { + struct vkd3d_shader_register_range range = {.space = 0, .first = i, .last = i}; ++ struct vkd3d_shader_register reg = {.idx[0].offset = i, .idx_count = 1}; + + if (parser->shader_desc.flat_constant_count[i].external) +- vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ++ vkd3d_shader_scan_add_descriptor(&context, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, ®, + &range, VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0); + } + +@@ -1169,13 +1166,26 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info + || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, + &parser->shader_desc.patch_constant_signature)) + { +- vkd3d_shader_free_scan_signature_info(signature_info); +- if (scan_descriptor_info) +- vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); + ret = VKD3D_ERROR_OUT_OF_MEMORY; + } + } + ++ if (!ret && descriptor_info) ++ ret = convert_descriptor_info(descriptor_info, descriptor_info1); ++ ++ if (ret < 0) ++ { ++ if (descriptor_info) ++ vkd3d_shader_free_scan_descriptor_info(descriptor_info); ++ if (descriptor_info1) ++ vkd3d_shader_free_scan_descriptor_info1(descriptor_info1); ++ if (signature_info) ++ vkd3d_shader_free_scan_signature_info(signature_info); ++ } ++ else ++ { ++ vkd3d_shader_free_scan_descriptor_info1(&local_descriptor_info1); ++ } + vkd3d_shader_scan_context_cleanup(&context); + return ret; + } +@@ -1192,7 +1202,7 @@ static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +- ret = scan_with_parser(compile_info, message_context, parser); ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +@@ -1210,7 +1220,7 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +- ret = scan_with_parser(compile_info, message_context, parser); ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +@@ -1228,7 +1238,7 @@ static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, + return ret; + } + +- ret = scan_with_parser(compile_info, message_context, parser); ++ ret = scan_with_parser(compile_info, message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +@@ -1287,7 +1297,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { +- struct vkd3d_shader_scan_descriptor_info scan_descriptor_info; ++ struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; + struct vkd3d_glsl_generator *glsl_generator; + struct vkd3d_shader_compile_info scan_info; + int ret; +@@ -1295,11 +1305,8 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + vkd3d_shader_dump_shader(compile_info->source_type, parser->shader_version.type, &compile_info->source); + + scan_info = *compile_info; +- scan_descriptor_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; +- scan_descriptor_info.next = scan_info.next; +- scan_info.next = &scan_descriptor_info; + +- if ((ret = scan_with_parser(&scan_info, message_context, parser)) < 0) ++ if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) + return ret; + + switch (compile_info->target_type) +@@ -1313,7 +1320,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + message_context, &parser->location))) + { + ERR("Failed to create GLSL generator.\n"); +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); ++ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + return VKD3D_ERROR; + } + +@@ -1331,7 +1338,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, + assert(0); + } + +- vkd3d_shader_free_scan_descriptor_info(&scan_descriptor_info); ++ vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + return ret; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index dc43175d4b5..c719085e11f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -133,10 +133,13 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE = 5024, + VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL = 5025, + VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, ++ VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, + VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE = 5302, ++ VKD3D_SHADER_WARNING_HLSL_IMAGINARY_NUMERIC_RESULT = 5303, ++ VKD3D_SHADER_WARNING_HLSL_NON_FINITE_RESULT = 5304, + + VKD3D_SHADER_ERROR_GLSL_INTERNAL = 6000, + +@@ -511,6 +514,8 @@ enum vkd3d_shader_register_type + VKD3DSPR_OUTSTENCILREF, + VKD3DSPR_UNDEF, + ++ VKD3DSPR_COUNT, ++ + VKD3DSPR_INVALID = ~0u, + }; + +@@ -1108,6 +1113,24 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse + parser->ops->parser_destroy(parser); + } + ++struct vkd3d_shader_descriptor_info1 ++{ ++ enum vkd3d_shader_descriptor_type type; ++ unsigned int register_space; ++ unsigned int register_index; ++ unsigned int register_id; ++ enum vkd3d_shader_resource_type resource_type; ++ enum vkd3d_shader_resource_data_type resource_data_type; ++ unsigned int flags; ++ unsigned int count; ++}; ++ ++struct vkd3d_shader_scan_descriptor_info1 ++{ ++ struct vkd3d_shader_descriptor_info1 *descriptors; ++ unsigned int descriptor_count; ++}; ++ + void vkd3d_shader_trace(const struct vkd3d_shader_instruction_array *instructions, + const struct vkd3d_shader_version *shader_version); + +@@ -1230,7 +1253,7 @@ void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); + #define SPIRV_MAX_SRC_COUNT 6 + + int spirv_compile(struct vkd3d_shader_parser *parser, +- const struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info, ++ const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +-- +2.40.1 +