diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch new file mode 100644 index 00000000..75502584 --- /dev/null +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch @@ -0,0 +1,8054 @@ +From cc051cf96ebe8e8d9ee67dd069b6bf1bc33a8257 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 7 Mar 2024 10:40:41 +1100 +Subject: [PATCH] Updated vkd3d to c792114a6a58c7c97abf827d154d7ecd22d81536. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 6 +- + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 25 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 616 ++++++++----- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 99 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 536 +++++++++-- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 6 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 118 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 99 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 7 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 858 ++++++++++++++---- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 689 ++++++++++++-- + libs/vkd3d/libs/vkd3d-shader/ir.c | 175 ++-- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + + libs/vkd3d/libs/vkd3d-shader/spirv.c | 229 +++-- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 349 +++++-- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 30 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 13 +- + libs/vkd3d/libs/vkd3d/command.c | 141 +-- + libs/vkd3d/libs/vkd3d/device.c | 16 +- + libs/vkd3d/libs/vkd3d/resource.c | 14 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 3 + + 22 files changed, 3073 insertions(+), 958 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index a9d709d10fe..e7b25602ec0 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -52,6 +52,10 @@ + ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ + | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) + ++#define VKD3D_EXPAND(x) x ++#define VKD3D_STRINGIFY(x) #x ++#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) ++ + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') + #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') + #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') +@@ -233,7 +237,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index f60ef7db769..c2c6ad67804 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -20,6 +20,7 @@ + #define WIDL_C_INLINE_WRAPPERS + #endif + #define COBJMACROS ++ + #define CONST_VTABLE + #include "vkd3d.h" + #include "vkd3d_blob.h" +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 9abc2c4db70..8a3eb5a367a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -254,6 +254,10 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_PHASE ] = "phase", + [VKD3DSIH_PHI ] = "phi", + [VKD3DSIH_POW ] = "pow", ++ [VKD3DSIH_QUAD_READ_ACROSS_D ] = "quad_read_across_d", ++ [VKD3DSIH_QUAD_READ_ACROSS_X ] = "quad_read_across_x", ++ [VKD3DSIH_QUAD_READ_ACROSS_Y ] = "quad_read_across_y", ++ [VKD3DSIH_QUAD_READ_LANE_AT ] = "quad_read_lane_at", + [VKD3DSIH_RCP ] = "rcp", + [VKD3DSIH_REP ] = "rep", + [VKD3DSIH_RESINFO ] = "resinfo", +@@ -1199,7 +1203,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + { + bool untyped = false; + +- switch (compiler->current->handler_idx) ++ switch (compiler->current->opcode) + { + case VKD3DSIH_MOV: + case VKD3DSIH_MOVC: +@@ -1755,7 +1759,7 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_BREAKP: + case VKD3DSIH_CONTINUEP: +@@ -1853,8 +1857,13 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile + break; + + case VKD3DSIH_TEX: +- if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) +- vkd3d_string_buffer_printf(buffer, "p"); ++ if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0)) ++ { ++ if (ins->flags & VKD3DSI_TEXLD_PROJECT) ++ vkd3d_string_buffer_printf(buffer, "p"); ++ else if (ins->flags & VKD3DSI_TEXLD_BIAS) ++ vkd3d_string_buffer_printf(buffer, "b"); ++ } + break; + + case VKD3DSIH_WAVE_OP_ADD: +@@ -1937,9 +1946,9 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + if (ins->coissue) + vkd3d_string_buffer_printf(buffer, "+"); + +- shader_print_opcode(compiler, ins->handler_idx); ++ shader_print_opcode(compiler, ins->opcode); + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_UAV_TYPED: +@@ -2430,7 +2439,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: +@@ -2459,7 +2468,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + + shader_dump_instruction(&compiler, ins); + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_IF: +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index bfd5b52b436..4522d56c5c9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1060,7 +1060,7 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) + + static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) + { +- if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) ++ if ((ins->opcode == VKD3DSIH_BREAKP || ins->opcode == VKD3DSIH_IF) && ins->flags) + { + vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, + "Ignoring unexpected instruction flags %#x.", ins->flags); +@@ -1142,23 +1142,23 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + goto fail; + } + +- if (ins->handler_idx == VKD3DSIH_DCL) ++ if (ins->opcode == VKD3DSIH_DCL) + { + shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); + } +- else if (ins->handler_idx == VKD3DSIH_DEF) ++ else if (ins->opcode == VKD3DSIH_DEF) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_FLOAT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } +- else if (ins->handler_idx == VKD3DSIH_DEFB) ++ else if (ins->opcode == VKD3DSIH_DEFB) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_SCALAR, VKD3D_DATA_UINT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } +- else if (ins->handler_idx == VKD3DSIH_DEFI) ++ else if (ins->opcode == VKD3DSIH_DEFI) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_INT); +@@ -1195,7 +1195,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + return; + + fail: +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + *ptr = sm1->end; + } + +@@ -1326,7 +1326,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c + ins = &instructions->elements[instructions->count]; + shader_sm1_read_instruction(&sm1, ins); + +- if (ins->handler_idx == VKD3DSIH_INVALID) ++ if (ins->opcode == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + vsir_program_cleanup(program); +@@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c + return ret; + } + +-bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) ++bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) + { + unsigned int i; + +@@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + unsigned int offset; + } + register_table[] = + { +- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, +- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, +- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, +- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, ++ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { +- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) ++ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) + && output == register_table[i].output +- && ctx->profile->type == register_table[i].shader_type +- && ctx->profile->major_version == register_table[i].major_version) ++ && version->type == register_table[i].shader_type ++ && version->major == register_table[i].major_version) + { + *type = register_table[i].type; +- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) ++ if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) + *reg = register_table[i].offset; + else +- *reg = semantic->index; ++ *reg = semantic_index; + return true; + } + } +@@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + return false; + } + +-bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) ++bool hlsl_sm1_usage_from_semantic(const char *semantic_name, ++ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) + { + static const struct + { +@@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { +- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) ++ if (!ascii_strcasecmp(semantic_name, semantics[i].name)) + { + *usage = semantics[i].usage; +- *usage_idx = semantic->index; ++ *usage_idx = semantic_index; + return true; + } + } +@@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU + return false; + } + ++struct d3dbc_compiler ++{ ++ struct vsir_program *program; ++ struct vkd3d_bytecode_buffer buffer; ++ struct vkd3d_shader_message_context *message_context; ++ ++ /* OBJECTIVE: Store all the required information in the other fields so ++ * that this hlsl_ctx is no longer necessary. */ ++ struct hlsl_ctx *ctx; ++}; ++ + static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) + { + if (type == VKD3D_SHADER_TYPE_VERTEX) +@@ -1497,13 +1509,16 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPC_OBJECT; ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: + break; + } + +@@ -1593,13 +1608,16 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPT_VERTEXSHADER; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: + break; + } + +@@ -1677,8 +1695,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) + list_move_tail(&ctx->extern_vars, &sorted); + } + +-static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- struct hlsl_ir_function_decl *entry_func) ++void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) + { + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; +@@ -1739,11 +1756,11 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + } + else + { +- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); + put_u32(buffer, var->bind_count[r]); + } + put_u32(buffer, 0); /* type */ +- put_u32(buffer, 0); /* FIXME: default value */ ++ put_u32(buffer, 0); /* default value */ + } + } + +@@ -1767,6 +1784,62 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + + write_sm1_type(buffer, var->data_type, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); ++ ++ if (var->default_values) ++ { ++ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int comp_count = hlsl_type_component_count(var->data_type); ++ unsigned int default_value_offset; ++ unsigned int k; ++ ++ default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); ++ set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); ++ ++ for (k = 0; k < comp_count; ++k) ++ { ++ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ unsigned int comp_offset; ++ enum hlsl_regset regset; ++ ++ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); ++ if (regset == HLSL_REGSET_NUMERIC) ++ { ++ union ++ { ++ uint32_t u; ++ float f; ++ } uni; ++ ++ switch (comp_type->e.numeric.type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &var->loc, "Write double default values."); ++ uni.u = 0; ++ break; ++ ++ case HLSL_TYPE_INT: ++ uni.f = var->default_values[k].value.i; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ uni.f = var->default_values[k].value.u; ++ break; ++ ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ uni.u = var->default_values[k].value.u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); ++ } ++ } ++ } ++ + ++uniform_count; + } + } +@@ -1778,7 +1851,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); + } + +-static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) ++static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) + { + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +@@ -1791,7 +1864,7 @@ struct sm1_instruction + + struct sm1_dst_register + { +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; +@@ -1799,16 +1872,42 @@ struct sm1_instruction + + struct sm1_src_register + { +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; +- } srcs[3]; ++ } srcs[4]; + unsigned int src_count; + + unsigned int has_dst; + }; + ++static bool is_inconsequential_instr(const struct sm1_instruction *instr) ++{ ++ const struct sm1_src_register *src = &instr->srcs[0]; ++ const struct sm1_dst_register *dst = &instr->dst; ++ unsigned int i; ++ ++ if (instr->opcode != D3DSIO_MOV) ++ return false; ++ if (dst->mod != D3DSPDM_NONE) ++ return false; ++ if (src->mod != D3DSPSM_NONE) ++ return false; ++ if (src->type != dst->type) ++ return false; ++ if (src->reg != dst->reg) ++ return false; ++ ++ for (i = 0; i < 4; ++i) ++ { ++ if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) ++ return false; ++ } ++ ++ return true; ++} ++ + static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) + { + assert(reg->writemask); +@@ -1821,15 +1920,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); + } + +-static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct sm1_instruction *instr) ++static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + uint32_t token = instr->opcode; + unsigned int i; + ++ if (is_inconsequential_instr(instr)) ++ return; ++ + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -1845,54 +1948,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w + src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); + } + +-static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, +- const struct hlsl_reg *src3) ++static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) + { + struct sm1_instruction instr = + { + .opcode = D3DSIO_DP2ADD, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, +- .srcs[2].type = D3DSPR_TEMP, ++ .srcs[2].type = VKD3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, +- const struct hlsl_reg *src2, const struct hlsl_reg *src3) ++static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, ++ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, +- .srcs[2].type = D3DSPR_TEMP, ++ .srcs[2].type = VKD3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, +@@ -1901,26 +2003,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, +@@ -1928,49 +2029,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) ++static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src, ++ D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.mod = dst_mod, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, +@@ -1978,16 +2078,16 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = arg1->data_type; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == dst_type->dimx); +@@ -2004,7 +2104,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_TYPE_DOUBLE: +@@ -2028,7 +2128,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + break; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_TYPE_BOOL: +@@ -2057,8 +2157,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + +-static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) +@@ -2067,12 +2170,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { +- .type = D3DSPR_CONST, ++ .type = VKD3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = constant_reg->index, + }; + +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2082,32 +2185,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + } + } + +-static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_var *var, bool output) ++static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, ++ const struct signature_element *element, bool output) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + +- if ((!output && !var->last_read) || (output && !var->first_write)) +- return; +- +- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) ++ if (hlsl_sm1_register_from_semantic(version, element->semantic_name, ++ element->semantic_index, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { +- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); ++ ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); + assert(ret); +- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; +- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; ++ reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ reg.reg = element->register_index; + } + + token = D3DSIO_DCL; +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2116,39 +2219,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + +- reg.writemask = (1 << var->data_type->dimx) - 1; ++ reg.writemask = element->mask; + write_sm1_dst_register(buffer, ®); + } + +-static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) + { ++ struct vsir_program *program = d3dbc->program; ++ const struct vkd3d_shader_version *version; + bool write_in = false, write_out = false; +- struct hlsl_ir_var *var; + +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) ++ version = &program->shader_version; ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) + write_in = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) ++ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) + write_in = write_out = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) ++ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) + write_in = true; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ if (write_in) ++ { ++ for (unsigned int i = 0; i < program->input_signature.element_count; ++i) ++ d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); ++ } ++ ++ if (write_out) + { +- if (write_in && var->is_input_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, false); +- if (write_out && var->is_output_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, true); ++ for (unsigned int i = 0; i < program->output_signature.element_count; ++i) ++ d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); + } + } + +-static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2175,20 +2286,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + +- reg.type = D3DSPR_SAMPLER; ++ reg.type = VKD3DSPR_COMBINED_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); + } + +-static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + +- if (ctx->profile->major_version < 2) ++ if (version->major < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +@@ -2210,27 +2323,26 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + continue; + } + +- reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; +- write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); ++ reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; ++ d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); + } + } + } + } + +-static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_CONST, ++ .srcs[0].type = VKD3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, +@@ -2239,10 +2351,10 @@ static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + assert(instr->reg.allocated); + assert(constant->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, + const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); +@@ -2255,28 +2367,30 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); +- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); ++ d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); + } + } + +-static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + + assert(instr->reg.allocated); + + if (expr->op == HLSL_OP1_REINTERPRET) + { +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + return; + } + + if (expr->op == HLSL_OP1_CAST) + { +- write_sm1_cast(ctx, buffer, instr); ++ d3dbc_write_cast(d3dbc, instr); + return; + } + +@@ -2290,70 +2404,70 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + switch (expr->op) + { + case HLSL_OP1_ABS: +- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSX: +- write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: +- write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_EXP2: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); + break; + + case HLSL_OP1_LOG2: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); + break; + + case HLSL_OP1_NEG: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + break; + + case HLSL_OP1_SAT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + break; + + case HLSL_OP1_RCP: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); + break; + + case HLSL_OP1_RSQ: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); + break; + + case HLSL_OP2_ADD: +- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP1_FRACT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + break; + + case HLSL_OP2_DOT: + switch (arg1->data_type->dimx) + { + case 4: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: +@@ -2362,27 +2476,27 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + break; + + case HLSL_OP2_LOGIC_AND: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_LOGIC_OR: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_SLT: +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL) + hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); +- write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP3_CMP: +- if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ if (version->type == VKD3D_SHADER_TYPE_VERTEX) + hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); +- write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + case HLSL_OP3_DP2ADD: +- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: +@@ -2391,10 +2505,9 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + +-static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block); ++static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); + +-static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_if *iff = hlsl_ir_if(instr); + const struct hlsl_ir_node *condition; +@@ -2408,33 +2521,33 @@ static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + .opcode = D3DSIO_IFC, + .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[0].reg = condition->reg.id, + .srcs[0].mod = 0, + +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[1].reg = condition->reg.id, + .srcs[1].mod = D3DSPSM_NEG, + + .src_count = 2, + }; +- write_sm1_instruction(ctx, buffer, &sm1_ifc); +- write_sm1_block(ctx, buffer, &iff->then_block); ++ d3dbc_write_instruction(d3dbc, &sm1_ifc); ++ d3dbc_write_block(d3dbc, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; +- write_sm1_instruction(ctx, buffer, &sm1_else); +- write_sm1_block(ctx, buffer, &iff->else_block); ++ d3dbc_write_instruction(d3dbc, &sm1_else); ++ d3dbc_write_block(d3dbc, &iff->else_block); + } + + sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; +- write_sm1_instruction(ctx, buffer, &sm1_endif); ++ d3dbc_write_instruction(d3dbc, &sm1_endif); + } + +-static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + +@@ -2448,35 +2561,36 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + { + .opcode = D3DSIO_TEXKILL, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + break; + } + + default: +- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); ++ hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + } + } + +-static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, +@@ -2487,15 +2601,15 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + if (load->src.var->is_uniform) + { + assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_CONST; ++ sm1_instr.srcs[0].type = VKD3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { +- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, +- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) ++ if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, ++ load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { + assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_INPUT; ++ sm1_instr.srcs[0].type = VKD3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else +@@ -2503,32 +2617,34 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; ++ struct hlsl_ir_node *ddx = load->ddx.node; ++ struct hlsl_ir_node *ddy = load->ddy.node; + unsigned int sampler_offset, reg_id; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); +- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; ++ reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), + +- .srcs[1].type = D3DSPR_SAMPLER, ++ .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + +@@ -2546,6 +2662,25 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + break; + ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ sm1_instr.opcode = D3DSIO_TEX; ++ sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ sm1_instr.opcode = D3DSIO_TEXLDD; ++ ++ sm1_instr.srcs[2].type = VKD3DSPR_TEMP; ++ sm1_instr.srcs[2].reg = ddx->reg.id; ++ sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); ++ ++ sm1_instr.srcs[3].type = VKD3DSPR_TEMP; ++ sm1_instr.srcs[3].reg = ddy->reg.id; ++ sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); ++ ++ sm1_instr.src_count += 2; ++ break; ++ + default: + hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); + return; +@@ -2553,25 +2688,26 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + + assert(instr->reg.allocated); + +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + const struct hlsl_ir_store *store = hlsl_ir_store(instr); +- const struct hlsl_ir_node *rhs = store->rhs.node; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); ++ const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, +@@ -2585,16 +2721,16 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + + if (store->lhs.var->is_output_semantic) + { +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) + { +- sm1_instr.dst.type = D3DSPR_TEMP; ++ sm1_instr.dst.type = VKD3DSPR_TEMP; + sm1_instr.dst.reg = 0; + } +- else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, +- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) ++ else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, ++ store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { + assert(reg.allocated); +- sm1_instr.dst.type = D3DSPR_OUTPUT; ++ sm1_instr.dst.type = VKD3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else +@@ -2604,11 +2740,10 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + assert(reg.allocated); + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; +@@ -2616,12 +2751,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), +@@ -2631,12 +2766,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + assert(instr->reg.allocated); + assert(val->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block) ++static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) + { ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +@@ -2656,38 +2791,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: +- write_sm1_constant(ctx, buffer, instr); ++ d3dbc_write_constant(d3dbc, instr); + break; + + case HLSL_IR_EXPR: +- write_sm1_expr(ctx, buffer, instr); ++ d3dbc_write_expr(d3dbc, instr); + break; + + case HLSL_IR_IF: + if (hlsl_version_ge(ctx, 2, 1)) +- write_sm1_if(ctx, buffer, instr); ++ d3dbc_write_if(d3dbc, instr); + else + hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); + break; + + case HLSL_IR_JUMP: +- write_sm1_jump(ctx, buffer, instr); ++ d3dbc_write_jump(d3dbc, instr); + break; + + case HLSL_IR_LOAD: +- write_sm1_load(ctx, buffer, instr); ++ d3dbc_write_load(d3dbc, instr); + break; + + case HLSL_IR_RESOURCE_LOAD: +- write_sm1_resource_load(ctx, buffer, instr); ++ d3dbc_write_resource_load(d3dbc, instr); + break; + + case HLSL_IR_STORE: +- write_sm1_store(ctx, buffer, instr); ++ d3dbc_write_store(d3dbc, instr); + break; + + case HLSL_IR_SWIZZLE: +- write_sm1_swizzle(ctx, buffer, instr); ++ d3dbc_write_swizzle(d3dbc, instr); + break; + + default: +@@ -2696,32 +2831,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + } + } + +-int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) ++/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving ++ * data from the other parameters instead, so it can be removed as an argument ++ * and be declared in vkd3d_shader_private.h and used without relying on HLSL ++ * IR structs. */ ++int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { +- struct vkd3d_bytecode_buffer buffer = {0}; ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ struct d3dbc_compiler d3dbc = {0}; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; ++ ++ d3dbc.ctx = ctx; ++ d3dbc.program = program; ++ d3dbc.message_context = message_context; + +- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); ++ put_u32(buffer, sm1_version(version->type, version->major, version->minor)); + +- write_sm1_uniforms(ctx, &buffer, entry_func); ++ bytecode_put_bytes(buffer, ctab->code, ctab->size); + +- write_sm1_constant_defs(ctx, &buffer); +- write_sm1_semantic_dcls(ctx, &buffer); +- write_sm1_sampler_dcls(ctx, &buffer); +- write_sm1_block(ctx, &buffer, &entry_func->body); ++ d3dbc_write_constant_defs(&d3dbc); ++ d3dbc_write_semantic_dcls(&d3dbc); ++ d3dbc_write_sampler_dcls(&d3dbc); ++ d3dbc_write_block(&d3dbc, &entry_func->body); + +- put_u32(&buffer, D3DSIO_END); ++ put_u32(buffer, D3DSIO_END); + +- if (buffer.status) +- ctx->result = buffer.status; ++ if (buffer->status) ++ ctx->result = buffer->status; + + if (!ctx->result) + { +- out->code = buffer.data; +- out->size = buffer.size; ++ out->code = buffer->data; ++ out->size = buffer->size; + } + else + { +- vkd3d_free(buffer.data); ++ vkd3d_free(buffer->data); + } + return ctx->result; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 73a8d8687c5..2176debc7d2 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -458,6 +458,8 @@ enum dx_intrinsic_opcode + DX_WAVE_ACTIVE_OP = 119, + DX_WAVE_ACTIVE_BIT = 120, + DX_WAVE_PREFIX_OP = 121, ++ DX_QUAD_READ_LANE_AT = 122, ++ DX_QUAD_OP = 123, + DX_LEGACY_F32TOF16 = 130, + DX_LEGACY_F16TOF32 = 131, + DX_WAVE_ALL_BIT_COUNT = 135, +@@ -576,6 +578,13 @@ enum dxil_wave_op_kind + WAVE_OP_MAX = 3, + }; + ++enum dxil_quad_op_kind ++{ ++ QUAD_READ_ACROSS_X = 0, ++ QUAD_READ_ACROSS_Y = 1, ++ QUAD_READ_ACROSS_D = 2, ++}; ++ + struct sm6_pointer_info + { + const struct sm6_type *type; +@@ -3755,21 +3764,21 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + for (i = 0; i < sm6->p.program->instructions.count; ++i) + { + ins = &sm6->p.program->instructions.elements[i]; +- if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) ++ if (ins->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) + { + ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( + (uintptr_t)ins->declaration.indexable_temp.initialiser, sm6); + } +- else if (ins->handler_idx == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) ++ else if (ins->opcode == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) + { + ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); + } +- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) ++ else if (ins->opcode == VKD3DSIH_DCL_TGSM_RAW) + { + ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } +- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) ++ else if (ins->opcode == VKD3DSIH_DCL_TGSM_STRUCTURED) + { + ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; +@@ -4402,7 +4411,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record + code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); + } + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, +@@ -4619,6 +4628,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co + return VKD3DSIH_IMAX; + case DX_IMIN: + return VKD3DSIH_IMIN; ++ case DX_QUAD_READ_LANE_AT: ++ return VKD3DSIH_QUAD_READ_LANE_AT; + case DX_UMAX: + return VKD3DSIH_UMAX; + case DX_UMIN: +@@ -4962,7 +4973,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int + reg->non_uniform = !!sm6_value_get_constant_uint(operands[3]); + + /* NOP is used to flag no instruction emitted. */ +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, +@@ -5370,6 +5381,47 @@ static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intr + sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); + } + ++static enum vkd3d_shader_opcode dx_map_quad_op(enum dxil_quad_op_kind op) ++{ ++ switch (op) ++ { ++ case QUAD_READ_ACROSS_X: ++ return VKD3DSIH_QUAD_READ_ACROSS_X; ++ case QUAD_READ_ACROSS_Y: ++ return VKD3DSIH_QUAD_READ_ACROSS_Y; ++ case QUAD_READ_ACROSS_D: ++ return VKD3DSIH_QUAD_READ_ACROSS_D; ++ default: ++ return VKD3DSIH_INVALID; ++ } ++} ++ ++static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ enum vkd3d_shader_opcode opcode; ++ enum dxil_quad_op_kind quad_op; ++ ++ quad_op = sm6_value_get_constant_uint(operands[1]); ++ if ((opcode = dx_map_quad_op(quad_op)) == VKD3DSIH_INVALID) ++ { ++ FIXME("Unhandled quad op kind %u.\n", quad_op); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, ++ "Quad op kind %u is unhandled.", quad_op); ++ return; ++ } ++ ++ vsir_instruction_init(ins, &sm6->p.location, opcode); ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_from_value(src_param, operands[0]); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -6229,6 +6281,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, + [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, + [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, ++ [DX_QUAD_OP ] = {"n", "Rc", sm6_parser_emit_dx_quad_op}, ++ [DX_QUAD_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, + [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, + [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, + [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, +@@ -6381,7 +6435,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade + { + const struct sm6_type *type; + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + + if (!dst->type) + return; +@@ -6628,7 +6682,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor + { + *dst = *value; + dst->type = type; +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + return; + } + +@@ -6739,7 +6793,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor + * do not otherwise occur, so deleting these avoids the need for backend support. */ + if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) + { +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + *dst = *a; + return; + } +@@ -7039,7 +7093,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record + reg->idx_count = 2; + dst->structure_stride = src->structure_stride; + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -7189,7 +7243,7 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record + incoming[j].block = sm6_function_get_block(function, record->operands[i + 1], sm6); + } + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + + qsort(incoming, phi->incoming_count, sizeof(*incoming), phi_incoming_compare); + +@@ -7224,7 +7278,7 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record + + code_block->terminator.type = TERMINATOR_RET; + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -7384,7 +7438,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec + terminator->cases[i / 2u].value = sm6_value_get_constant_uint64(src); + } + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -7843,7 +7897,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + } + + ins = &code_block->instructions[code_block->instruction_count]; +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + + dst = sm6_parser_get_current_value(sm6); + fwd_type = dst->type; +@@ -7922,7 +7976,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + + if (sm6->p.failed) + return VKD3D_ERROR; +- assert(ins->handler_idx != VKD3DSIH_INVALID); + + if (record->attachment) + metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); +@@ -7933,9 +7986,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; + } + if (code_block) +- code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; +- else +- assert(ins->handler_idx == VKD3DSIH_NOP); ++ code_block->instruction_count += ins->opcode != VKD3DSIH_NOP; + + if (dst->type && fwd_type && dst->type != fwd_type) + { +@@ -8735,7 +8786,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc + + if (!m) + { +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->declaration.raw_resource.resource.reg.write_mask = 0; + return &ins->declaration.raw_resource.resource; + } +@@ -8760,7 +8811,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc + "A typed resource has no data type."); + } + +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + ins->declaration.semantic.resource_data_type[i] = resource_values.data_type; + ins->declaration.semantic.resource_type = resource_type; +@@ -8770,14 +8821,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc + } + else if (kind == RESOURCE_KIND_RAWBUFFER) + { +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->declaration.raw_resource.resource.reg.write_mask = 0; + + return &ins->declaration.raw_resource.resource; + } + else if (kind == RESOURCE_KIND_STRUCTUREDBUFFER) + { +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; + ins->declaration.structured_resource.byte_stride = resource_values.byte_stride; + ins->declaration.structured_resource.resource.reg.write_mask = 0; + +@@ -8858,7 +8909,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, + d->kind = kind; + d->reg_type = VKD3DSPR_RESOURCE; + d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; +- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL) ++ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) + ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; + + init_resource_declaration(resource, VKD3DSPR_RESOURCE, d->reg_data_type, d->id, &d->range); +@@ -8932,7 +8983,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, + d->kind = values[0]; + d->reg_type = VKD3DSPR_UAV; + d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; +- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL_UAV_TYPED) ++ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) + ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; + + init_resource_declaration(resource, VKD3DSPR_UAV, d->reg_data_type, d->id, &d->range); +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 57b4ac24212..bd2ad1290cd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -56,6 +56,70 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) + vkd3d_free(string_entry); + } + ++struct state_block_function_info ++{ ++ const char *name; ++ unsigned int min_args, max_args; ++}; ++ ++static const struct state_block_function_info *get_state_block_function_info(const char *name) ++{ ++ static const struct state_block_function_info valid_functions[] = ++ { ++ {"SetBlendState", 3, 3}, ++ {"SetDepthStencilState", 2, 2}, ++ {"SetRasterizerState", 1, 1}, ++ {"SetVertexShader", 1, 1}, ++ {"SetDomainShader", 1, 1}, ++ {"SetHullShader", 1, 1}, ++ {"SetGeometryShader", 1, 1}, ++ {"SetPixelShader", 1, 1}, ++ {"SetComputeShader", 1, 1}, ++ {"OMSetRenderTargets", 2, 9}, ++ }; ++ ++ for (unsigned int i = 0; i < ARRAY_SIZE(valid_functions); ++i) ++ { ++ if (!strcmp(name, valid_functions[i].name)) ++ return &valid_functions[i]; ++ } ++ return NULL; ++} ++ ++bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, ++ const struct vkd3d_shader_location *loc) ++{ ++ if (entry->is_function_call) ++ { ++ const struct state_block_function_info *info = get_state_block_function_info(entry->name); ++ ++ if (!info) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid state block function '%s'.", entry->name); ++ return false; ++ } ++ if (entry->args_count < info->min_args || entry->args_count > info->max_args) ++ { ++ if (info->min_args == info->max_args) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid argument count for state block function '%s' (expected %u).", ++ entry->name, info->min_args); ++ } ++ else ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid argument count for state block function '%s' (expected from %u to %u).", ++ entry->name, info->min_args, info->max_args); ++ } ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + struct fx_write_context; + + struct fx_write_context_ops +@@ -63,6 +127,7 @@ struct fx_write_context_ops + uint32_t (*write_string)(const char *string, struct fx_write_context *fx); + void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); ++ void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); + bool are_child_effects_supported; + }; + +@@ -94,6 +159,8 @@ struct fx_write_context + uint32_t texture_count; + uint32_t uav_count; + uint32_t sampler_state_count; ++ uint32_t depth_stencil_state_count; ++ uint32_t rasterizer_state_count; + int status; + + bool child_effect; +@@ -128,8 +195,41 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) + fx->ops->write_pass(var, fx); + } + ++static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) ++{ ++ struct hlsl_ctx *ctx = fx->ctx; ++ struct hlsl_ir_var *v; ++ uint32_t count = 0; ++ ++ if (!scope) ++ return 0; ++ ++ LIST_FOR_EACH_ENTRY(v, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (!v->default_values) ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Annotation variable is missing default value."); ++ ++ fx->ops->write_annotation(v, fx); ++ ++count; ++ } ++ ++ return count; ++} ++ ++static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t count_offset, count; ++ ++ count_offset = put_u32(buffer, 0); ++ count = write_annotations(scope, fx); ++ set_u32(buffer, count_offset, count); ++} ++ + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); + static const char * get_fx_4_type_name(const struct hlsl_type *type); ++static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); + + static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) + { +@@ -279,9 +379,9 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx + name_offset = write_string(var->name, fx); + put_u32(buffer, name_offset); + put_u32(buffer, 0); /* Assignment count. */ +- put_u32(buffer, 0); /* Annotation count. */ + +- /* TODO: annotations */ ++ write_fx_4_annotations(var->annotations, fx); ++ + /* TODO: assignments */ + } + +@@ -402,6 +502,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + case HLSL_CLASS_UAV: + return uav_type_names[type->sampler_dim]; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ return "DepthStencilState"; ++ + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + return "DepthStencilView"; + +@@ -421,10 +524,20 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) + { ++ struct field_offsets ++ { ++ uint32_t name; ++ uint32_t semantic; ++ uint32_t offset; ++ uint32_t type; ++ }; ++ uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; +- uint32_t name_offset, offset, size, stride, numeric_desc; ++ struct field_offsets *field_offsets = NULL; ++ struct hlsl_ctx *ctx = fx->ctx; + uint32_t elements_count = 0; + const char *name; ++ size_t i; + + /* Resolve arrays to element type and number of elements. */ + if (type->class == HLSL_CLASS_ARRAY) +@@ -436,6 +549,22 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + name = get_fx_4_type_name(type); + + name_offset = write_string(name, fx); ++ if (type->class == HLSL_CLASS_STRUCT) ++ { ++ if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) ++ return 0; ++ ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ const struct hlsl_struct_field *field = &type->e.record.fields[i]; ++ ++ field_offsets[i].name = write_string(field->name, fx); ++ field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); ++ field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; ++ field_offsets[i].type = write_type(field->type, fx); ++ } ++ } ++ + offset = put_u32_unaligned(buffer, name_offset); + + switch (type->class) +@@ -446,8 +575,10 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + put_u32_unaligned(buffer, 1); + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: +@@ -464,6 +595,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_CONSTANT_BUFFER: + vkd3d_unreachable(); + + case HLSL_CLASS_STRING: +@@ -473,34 +605,40 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + return 0; + } + +- size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); ++ /* Structures can only contain numeric fields, this is validated during variable declaration. */ ++ total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); ++ packed_size = 0; ++ if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) ++ packed_size = hlsl_type_component_count(type) * sizeof(float); + if (elements_count) +- size *= elements_count; ++ { ++ total_size *= elements_count; ++ packed_size *= elements_count; ++ } + stride = align(stride, 4 * sizeof(float)); + + put_u32_unaligned(buffer, elements_count); +- put_u32_unaligned(buffer, size); /* Total size. */ +- put_u32_unaligned(buffer, stride); /* Stride. */ +- put_u32_unaligned(buffer, size); ++ put_u32_unaligned(buffer, total_size); ++ put_u32_unaligned(buffer, stride); ++ put_u32_unaligned(buffer, packed_size); + + if (type->class == HLSL_CLASS_STRUCT) + { +- size_t i; +- + put_u32_unaligned(buffer, type->e.record.field_count); + for (i = 0; i < type->e.record.field_count; ++i) + { +- const struct hlsl_struct_field *field = &type->e.record.fields[i]; +- uint32_t semantic_offset, field_type_offset; ++ const struct field_offsets *field = &field_offsets[i]; + +- name_offset = write_string(field->name, fx); +- semantic_offset = write_string(field->semantic.name, fx); +- field_type_offset = write_type(field->type, fx); ++ put_u32_unaligned(buffer, field->name); ++ put_u32_unaligned(buffer, field->semantic); ++ put_u32_unaligned(buffer, field->offset); ++ put_u32_unaligned(buffer, field->type); ++ } + +- put_u32_unaligned(buffer, name_offset); +- put_u32_unaligned(buffer, semantic_offset); +- put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); +- put_u32_unaligned(buffer, field_type_offset); ++ if (ctx->profile->major_version == 5) ++ { ++ put_u32_unaligned(buffer, 0); /* Base class type */ ++ put_u32_unaligned(buffer, 0); /* Interface count */ + } + } + else if (type->class == HLSL_CLASS_TEXTURE) +@@ -556,6 +694,14 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + { + put_u32_unaligned(buffer, 6); + } ++ else if (type->class == HLSL_CLASS_RASTERIZER_STATE) ++ { ++ put_u32_unaligned(buffer, 4); ++ } ++ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) ++ { ++ put_u32_unaligned(buffer, 3); ++ } + else if (hlsl_is_numeric_type(type)) + { + numeric_desc = get_fx_4_numeric_type_description(type, fx); +@@ -565,9 +711,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + { + FIXME("Type %u is not supported.\n", type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); +- return 0; + } + ++ vkd3d_free(field_offsets); + return offset; + } + +@@ -581,8 +727,9 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex + name_offset = write_string(var->name, fx); + put_u32(buffer, name_offset); + count_offset = put_u32(buffer, 0); +- put_u32(buffer, 0); /* Annotation count. */ ++ write_fx_4_annotations(var->annotations, fx); + ++ count = 0; + LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) + { + write_pass(pass, fx); +@@ -617,7 +764,7 @@ static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) + + put_u32(buffer, name_offset); + count_offset = put_u32(buffer, 0); /* Technique count */ +- put_u32(buffer, 0); /* Annotation count */ ++ write_fx_4_annotations(var ? var->annotations : NULL, fx); + + count = fx->technique_count; + write_techniques(var ? var->scope : fx->ctx->globals, fx); +@@ -683,7 +830,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + } + + name_offset = write_string(name, fx); +- semantic_offset = write_string(semantic->name, fx); ++ semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; + + offset = put_u32(buffer, hlsl_sm1_base_type(type)); + put_u32(buffer, hlsl_sm1_class(type)); +@@ -794,6 +941,9 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + /* FIXME: write actual initial value */ ++ if (var->default_values) ++ hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); ++ + offset = put_u32(buffer, 0); + + for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) +@@ -850,8 +1000,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type + hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); + return false; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_VOID: + return false; +@@ -859,6 +1011,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_CONSTANT_BUFFER: + /* This cannot appear as an extern variable. */ + break; + } +@@ -972,9 +1125,72 @@ static const struct fx_write_context_ops fx_4_ops = + .write_string = write_fx_4_string, + .write_technique = write_fx_4_technique, + .write_pass = write_fx_4_pass, ++ .write_annotation = write_fx_4_annotation, + .are_child_effects_supported = true, + }; + ++static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, ++ struct fx_write_context *fx) ++{ ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); ++ uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; ++ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; ++ struct hlsl_ctx *ctx = fx->ctx; ++ uint32_t offset = buffer->size; ++ unsigned int comp_count; ++ ++ if (!value) ++ return 0; ++ ++ comp_count = hlsl_type_component_count(type); ++ ++ for (i = 0; i < elements_count; ++i) ++ { ++ switch (type->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ { ++ switch (type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ ++ for (j = 0; j < comp_count; ++j) ++ { ++ put_u32_unaligned(buffer, value->value.u); ++ value++; ++ } ++ break; ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", ++ type->e.numeric.type); ++ } ++ ++ break; ++ } ++ case HLSL_CLASS_STRUCT: ++ { ++ struct hlsl_struct_field *fields = type->e.record.fields; ++ ++ for (j = 0; j < type->e.record.field_count; ++j) ++ { ++ write_fx_4_default_value(fields[i].type, value, fx); ++ value += hlsl_type_component_count(fields[i].type); ++ } ++ break; ++ } ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); ++ } ++ } ++ ++ return offset; ++} ++ + static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) + { + struct vkd3d_bytecode_buffer *buffer = &fx->structured; +@@ -984,22 +1200,20 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st + { + HAS_EXPLICIT_BIND_POINT = 0x4, + }; +- struct hlsl_ctx *ctx = fx->ctx; + +- /* Explicit bind point. */ +- if (var->reg_reservation.reg_type) ++ if (var->has_explicit_bind_point) + flags |= HAS_EXPLICIT_BIND_POINT; + + type_offset = write_type(var->data_type, fx); + name_offset = write_string(var->name, fx); +- semantic_offset = write_string(var->semantic.name, fx); ++ semantic_offset = write_string(var->semantic.raw_name, fx); + + put_u32(buffer, name_offset); + put_u32(buffer, type_offset); + + semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ +- put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ +- value_offset = put_u32(buffer, 0); /* Default value offset */ ++ put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ ++ value_offset = put_u32(buffer, 0); + put_u32(buffer, flags); /* Flags */ + + if (shared) +@@ -1008,17 +1222,39 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st + } + else + { +- /* FIXME: write default value */ +- set_u32(buffer, value_offset, 0); ++ uint32_t offset = write_fx_4_default_value(var->data_type, var->default_values, fx); ++ set_u32(buffer, value_offset, offset); + +- put_u32(buffer, 0); /* Annotations count */ +- if (has_annotations(var)) +- hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); ++ write_fx_4_annotations(var->annotations, fx); + + fx->numeric_variable_count++; + } + } + ++static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t name_offset, type_offset, offset; ++ struct hlsl_ctx *ctx = fx->ctx; ++ ++ name_offset = write_string(var->name, fx); ++ type_offset = write_type(var->data_type, fx); ++ ++ put_u32(buffer, name_offset); ++ put_u32(buffer, type_offset); ++ ++ if (hlsl_is_numeric_type(type)) ++ { ++ offset = write_fx_4_default_value(var->data_type, var->default_values, fx); ++ put_u32(buffer, offset); ++ } ++ else ++ { ++ hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); ++ } ++} ++ + struct rhs_named_value + { + const char *name; +@@ -1118,6 +1354,9 @@ static bool state_block_contains_state(const char *name, unsigned int start, str + + for (i = start; i < block->count; ++i) + { ++ if (block->entries[i]->is_function_call) ++ continue; ++ + if (!ascii_strcasecmp(block->entries[i]->name, name)) + return true; + } +@@ -1160,6 +1399,41 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no + return true; + } + ++static void fold_state_value(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry) ++{ ++ bool progress; ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); ++ } while (progress); ++} ++ ++enum state_property_component_type ++{ ++ FX_BOOL, ++ FX_FLOAT, ++ FX_UINT, ++ FX_UINT8, ++}; ++ ++static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) ++{ ++ switch (type) ++ { ++ case FX_BOOL: ++ return HLSL_TYPE_BOOL; ++ case FX_FLOAT: ++ return HLSL_TYPE_FLOAT; ++ case FX_UINT: ++ case FX_UINT8: ++ return HLSL_TYPE_UINT; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ + static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, + struct fx_write_context *fx) + { +@@ -1209,37 +1483,112 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + { NULL } + }; + ++ static const struct rhs_named_value depth_write_mask_values[] = ++ { ++ { "ZERO", 0 }, ++ { "ALL", 1 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value comparison_values[] = ++ { ++ { "NEVER", 1 }, ++ { "LESS", 2 }, ++ { "EQUAL", 3 }, ++ { "LESS_EQUAL", 4 }, ++ { "GREATER", 5 }, ++ { "NOT_EQUAL", 6 }, ++ { "GREATER_EQUAL", 7 }, ++ { "ALWAYS", 8 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value stencil_op_values[] = ++ { ++ { "KEEP", 1 }, ++ { "ZERO", 2 }, ++ { "REPLACE", 3 }, ++ { "INCR_SAT", 4 }, ++ { "DECR_SAT", 5 }, ++ { "INVERT", 6 }, ++ { "INCR", 7 }, ++ { "DECR", 8 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value fill_values[] = ++ { ++ { "WIREFRAME", 2 }, ++ { "SOLID", 3 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value cull_values[] = ++ { ++ { "NONE", 1 }, ++ { "FRONT", 2 }, ++ { "BACK", 3 }, ++ { NULL } ++ }; ++ + static const struct state + { + const char *name; + enum hlsl_type_class container; +- enum hlsl_base_type type; ++ enum hlsl_type_class class; ++ enum state_property_component_type type; + unsigned int dimx; + uint32_t id; + const struct rhs_named_value *values; + } + states[] = + { +- { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, +- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, +- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, +- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, +- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, +- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, +- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, +- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, +- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, +- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, ++ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 12, fill_values }, ++ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 13, cull_values }, ++ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 14 }, ++ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 15 }, ++ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 16 }, ++ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 17 }, ++ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 18 }, ++ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 19 }, ++ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 20 }, ++ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 21 }, ++ ++ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 22 }, ++ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 23, depth_write_mask_values }, ++ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 24, comparison_values }, ++ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 25 }, ++ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 26 }, ++ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 27 }, ++ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 28, stencil_op_values }, ++ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 29, stencil_op_values }, ++ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 30, stencil_op_values }, ++ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 31, comparison_values }, ++ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 32, stencil_op_values }, ++ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 33, stencil_op_values }, ++ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 34, stencil_op_values }, ++ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 35, comparison_values }, ++ ++ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 45, filter_values }, ++ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 46, address_values }, ++ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 47, address_values }, ++ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 48, address_values }, ++ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 49 }, ++ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 50 }, ++ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 51, compare_func_values }, ++ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 52 }, ++ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 53 }, ++ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 54 }, + /* TODO: "Texture" field */ + }; + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct replace_state_context replace_context; ++ struct hlsl_type *state_type = NULL; + struct hlsl_ir_node *node, *cast; + const struct state *state = NULL; + struct hlsl_ctx *ctx = fx->ctx; +- struct hlsl_type *state_type; ++ enum hlsl_base_type base_type; + unsigned int i; +- bool progress; + + for (i = 0; i < ARRAY_SIZE(states); ++i) + { +@@ -1269,28 +1618,54 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + replace_context.values = state->values; + replace_context.var = var; + +- /* Turned named constants to actual constants. */ ++ /* Turn named constants to actual constants. */ + hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); ++ fold_state_value(ctx, entry); + +- if (state->dimx) +- state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); +- else +- state_type = hlsl_get_scalar_type(ctx, state->type); +- +- /* Cast to expected property type. */ +- node = entry->args->node; +- if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) +- return; +- list_add_after(&node->entry, &cast->entry); ++ /* Now cast and run folding again. */ + +- hlsl_src_remove(entry->args); +- hlsl_src_from_node(entry->args, cast); ++ base_type = hlsl_type_from_fx_type(state->type); ++ switch (state->class) ++ { ++ case HLSL_CLASS_VECTOR: ++ state_type = hlsl_get_vector_type(ctx, base_type, state->dimx); ++ break; ++ case HLSL_CLASS_SCALAR: ++ state_type = hlsl_get_scalar_type(ctx, base_type); ++ break; ++ case HLSL_CLASS_TEXTURE: ++ hlsl_fixme(ctx, &ctx->location, "Object type fields are not supported."); ++ break; ++ default: ++ ; ++ } + +- do ++ if (state_type) + { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); +- } while (progress); ++ node = entry->args->node; ++ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) ++ return; ++ list_add_after(&node->entry, &cast->entry); ++ ++ /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ ++ if (state->type == FX_UINT8) ++ { ++ struct hlsl_ir_node *mask; ++ ++ if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) ++ return; ++ list_add_after(&cast->entry, &mask->entry); ++ ++ if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) ++ return; ++ list_add_after(&mask->entry, &cast->entry); ++ } ++ ++ hlsl_src_remove(entry->args); ++ hlsl_src_from_node(entry->args, cast); ++ ++ fold_state_value(ctx, entry); ++ } + } + + static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) +@@ -1344,7 +1719,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + + type_offset = write_type(var->data_type, fx); + name_offset = write_string(var->name, fx); +- semantic_offset = write_string(var->semantic.name, fx); ++ semantic_offset = write_string(var->semantic.raw_name, fx); + + put_u32(buffer, name_offset); + put_u32(buffer, type_offset); +@@ -1383,19 +1758,27 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + fx->dsv_count += elements_count; + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ write_fx_4_state_object_initializer(var, fx); ++ fx->depth_stencil_state_count += elements_count; ++ break; ++ + case HLSL_CLASS_SAMPLER: + write_fx_4_state_object_initializer(var, fx); + fx->sampler_state_count += elements_count; + break; + ++ case HLSL_CLASS_RASTERIZER_STATE: ++ write_fx_4_state_object_initializer(var, fx); ++ fx->rasterizer_state_count += elements_count; ++ break; ++ + default: + hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", + type->e.numeric.type); + } + +- put_u32(buffer, 0); /* Annotations count */ +- if (has_annotations(var)) +- hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); ++ write_fx_4_annotations(var->annotations, fx); + + ++fx->object_variable_count; + } +@@ -1438,9 +1821,7 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + } + else + { +- put_u32(buffer, 0); /* Annotations count */ +- if (b->annotations) +- hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); ++ write_fx_4_annotations(b->annotations, fx); + ++fx->buffer_count; + } + +@@ -1464,6 +1845,9 @@ static void write_buffers(struct fx_write_context *fx, bool shared) + { + struct hlsl_buffer *buffer; + ++ if (shared && !fx->child_effect) ++ return; ++ + LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->size && !fx->include_empty_buffers) +@@ -1483,8 +1867,10 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc + + switch (type->class) + { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: +@@ -1551,9 +1937,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ + put_u32(&buffer, 0); /* String count. */ + put_u32(&buffer, fx.texture_count); +- put_u32(&buffer, 0); /* Depth stencil state count. */ ++ put_u32(&buffer, fx.depth_stencil_state_count); + put_u32(&buffer, 0); /* Blend state count. */ +- put_u32(&buffer, 0); /* Rasterizer state count. */ ++ put_u32(&buffer, fx.rasterizer_state_count); + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); + put_u32(&buffer, fx.dsv_count); +@@ -1609,9 +1995,9 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ + put_u32(&buffer, 0); /* String count. */ + put_u32(&buffer, fx.texture_count); +- put_u32(&buffer, 0); /* Depth stencil state count. */ ++ put_u32(&buffer, fx.depth_stencil_state_count); + put_u32(&buffer, 0); /* Blend state count. */ +- put_u32(&buffer, 0); /* Rasterizer state count. */ ++ put_u32(&buffer, fx.rasterizer_state_count); + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); + put_u32(&buffer, fx.dsv_count); +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 3e482a5fc70..8725724a239 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -48,9 +48,9 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne + static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + shader_glsl_print_indent(&gen->buffer, gen->indent); +- vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); ++ vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->opcode); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); ++ "Internal compiler error: Unhandled instruction %#x.", ins->opcode); + } + + static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, +@@ -74,7 +74,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator + { + generator->location = instruction->location; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_OUTPUT: +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 99214fba6de..acf50869a40 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -167,6 +167,8 @@ void hlsl_free_var(struct hlsl_ir_var *decl) + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); + ++ vkd3d_free(decl->default_values); ++ + for (i = 0; i < decl->state_block_count; ++i) + hlsl_free_state_block(decl->state_blocks[i]); + vkd3d_free(decl->state_blocks); +@@ -367,15 +369,18 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + type->reg_size[HLSL_REGSET_UAVS] = 1; + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: + break; + } + } +@@ -435,11 +440,13 @@ static bool type_is_single_component(const struct hlsl_type *type) + { + switch (type->class) + { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: +@@ -450,6 +457,7 @@ static bool type_is_single_component(const struct hlsl_type *type) + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_CONSTANT_BUFFER: + return false; + + case HLSL_CLASS_EFFECT_GROUP: +@@ -528,6 +536,12 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + vkd3d_unreachable(); + } + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ { ++ *type_ptr = type->e.resource.format; ++ return traverse_path_from_component_index(ctx, type_ptr, index_ptr); ++ } ++ + default: + vkd3d_unreachable(); + } +@@ -556,12 +570,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + + switch (type->class) + { +- case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +- case HLSL_CLASS_MATRIX: + offset[HLSL_REGSET_NUMERIC] += idx; + break; + ++ case HLSL_CLASS_MATRIX: ++ offset[HLSL_REGSET_NUMERIC] += 4 * idx; ++ break; ++ + case HLSL_CLASS_STRUCT: + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + offset[r] += type->e.record.fields[idx].reg_offset[r]; +@@ -577,8 +593,10 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + } + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: +@@ -592,6 +610,8 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_CONSTANT_BUFFER: + vkd3d_unreachable(); + } + type = next_type; +@@ -865,6 +885,20 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim + return type; + } + ++struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format) ++{ ++ struct hlsl_type *type; ++ ++ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) ++ return NULL; ++ type->class = HLSL_CLASS_CONSTANT_BUFFER; ++ type->dimy = 1; ++ type->e.resource.format = format; ++ hlsl_type_calculate_reg_size(ctx, type); ++ list_add_tail(&ctx->types, &type->entry); ++ return type; ++} ++ + static const char * get_case_insensitive_typename(const char *name) + { + static const char *const names[] = +@@ -956,8 +990,13 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_ARRAY: + return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ return hlsl_type_component_count(type->e.resource.format); ++ ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: +@@ -1038,10 +1077,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + case HLSL_CLASS_TECHNIQUE: + return t1->e.version == t2->e.version; + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); ++ ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: + case HLSL_CLASS_VERTEX_SHADER: +@@ -1247,6 +1291,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const cha + list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); + else + list_add_tail(&ctx->globals->vars, &var->scope_entry); ++ var->is_synthetic = true; + } + return var; + } +@@ -1765,7 +1810,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type + } + + struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, const struct vkd3d_shader_location *loc) ++ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, ++ unsigned int unroll_limit, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_loop *loop; + +@@ -1774,6 +1820,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); + hlsl_block_init(&loop->body); + hlsl_block_add_block(&loop->body, block); ++ ++ loop->unroll_type = unroll_type; ++ loop->unroll_limit = unroll_limit; + return &loop->node; + } + +@@ -1836,9 +1885,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct + return map->instrs[i].dst; + } + +- /* The block passed to hlsl_clone_block() should have been free of external +- * references. */ +- vkd3d_unreachable(); ++ return src; + } + + static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, +@@ -1935,7 +1982,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ + if (!clone_block(ctx, &body, &src->body, map)) + return NULL; + +- if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) ++ if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + { + hlsl_block_cleanup(&body); + return NULL; +@@ -2407,10 +2454,21 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + } + return string; + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ vkd3d_string_buffer_printf(string, "ConstantBuffer"); ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } ++ return string; ++ ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: +@@ -2735,6 +2793,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + static const char *const op_names[] = + { + [HLSL_OP0_VOID] = "void", ++ [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", + + [HLSL_OP1_ABS] = "abs", + [HLSL_OP1_BIT_NOT] = "~", +@@ -3086,6 +3145,33 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl + vkd3d_string_buffer_cleanup(&buffer); + } + ++void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) ++{ ++ unsigned int k, component_count = hlsl_type_component_count(var->data_type); ++ struct vkd3d_string_buffer buffer; ++ ++ vkd3d_string_buffer_init(&buffer); ++ if (!var->default_values) ++ { ++ vkd3d_string_buffer_printf(&buffer, "var \"%s\" has no default values.\n", var->name); ++ vkd3d_string_buffer_trace(&buffer); ++ vkd3d_string_buffer_cleanup(&buffer); ++ return; ++ } ++ ++ vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); ++ for (k = 0; k < component_count; ++k) ++ { ++ if (k % 4 == 0) ++ vkd3d_string_buffer_printf(&buffer, "\n "); ++ vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].value.u); ++ } ++ vkd3d_string_buffer_printf(&buffer, "\n"); ++ ++ vkd3d_string_buffer_trace(&buffer); ++ vkd3d_string_buffer_cleanup(&buffer); ++} ++ + void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) + { + struct hlsl_src *src, *next; +@@ -3319,9 +3405,25 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) + void hlsl_cleanup_semantic(struct hlsl_semantic *semantic) + { + vkd3d_free((void *)semantic->name); ++ vkd3d_free((void *)semantic->raw_name); + memset(semantic, 0, sizeof(*semantic)); + } + ++bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src) ++{ ++ *dst = *src; ++ dst->name = dst->raw_name = NULL; ++ if (src->name && !(dst->name = hlsl_strdup(ctx, src->name))) ++ return false; ++ if (src->raw_name && !(dst->raw_name = hlsl_strdup(ctx, src->raw_name))) ++ { ++ hlsl_cleanup_semantic(dst); ++ return false; ++ } ++ ++ return true; ++} ++ + static void free_function_decl(struct hlsl_ir_function_decl *decl) + { + unsigned int i; +@@ -3712,9 +3814,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + + ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 27814f3a56f..5832958712a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -78,10 +78,12 @@ enum hlsl_type_class + HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, + HLSL_CLASS_STRUCT, + HLSL_CLASS_ARRAY, ++ HLSL_CLASS_DEPTH_STENCIL_STATE, + HLSL_CLASS_DEPTH_STENCIL_VIEW, + HLSL_CLASS_EFFECT_GROUP, + HLSL_CLASS_PASS, + HLSL_CLASS_PIXEL_SHADER, ++ HLSL_CLASS_RASTERIZER_STATE, + HLSL_CLASS_RENDER_TARGET_VIEW, + HLSL_CLASS_SAMPLER, + HLSL_CLASS_STRING, +@@ -89,6 +91,7 @@ enum hlsl_type_class + HLSL_CLASS_TEXTURE, + HLSL_CLASS_UAV, + HLSL_CLASS_VERTEX_SHADER, ++ HLSL_CLASS_CONSTANT_BUFFER, + HLSL_CLASS_VOID, + }; + +@@ -222,6 +225,8 @@ struct hlsl_semantic + const char *name; + uint32_t index; + ++ /* Name exactly as it appears in the sources. */ ++ const char *raw_name; + /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ + bool reported_missing; + /* In case the variable or field that stores this semantic has already reported to use a +@@ -259,8 +264,20 @@ struct hlsl_struct_field + * struct. */ + struct hlsl_reg + { +- /* Index of the first register allocated. */ ++ /* Register number of the first register allocated. */ + uint32_t id; ++ /* For descriptors (buffer, texture, sampler, UAV) this is the base binding ++ * index of the descriptor. ++ * For 5.1 and above descriptors have space and may be arrayed, in which ++ * case the array shares a single register ID but has a range of register ++ * indices, and "id" and "index" are as a rule not equal. ++ * For versions below 5.1, the register number for descriptors is the same ++ * as its external binding index, so only "index" is used, and "id" is ++ * ignored. ++ * For numeric registers "index" is not used. */ ++ uint32_t index; ++ /* Register space of a descriptor. Not used for numeric registers. */ ++ uint32_t space; + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ +@@ -371,6 +388,7 @@ struct hlsl_attribute + #define HLSL_STORAGE_LINEAR 0x00010000 + #define HLSL_MODIFIER_SINGLE 0x00020000 + #define HLSL_MODIFIER_EXPORT 0x00040000 ++#define HLSL_STORAGE_ANNOTATION 0x00080000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +@@ -396,6 +414,14 @@ struct hlsl_reg_reservation + unsigned int offset_index; + }; + ++union hlsl_constant_value_component ++{ ++ uint32_t u; ++ int32_t i; ++ float f; ++ double d; ++}; ++ + struct hlsl_ir_var + { + struct hlsl_type *data_type; +@@ -418,6 +444,15 @@ struct hlsl_ir_var + /* Scope that contains annotations for this variable. */ + struct hlsl_scope *annotations; + ++ /* Array of default values the variable was initialized with, one for each component. ++ * Only for variables that need it, such as uniforms and variables inside constant buffers. ++ * This pointer is NULL for others. */ ++ struct hlsl_default_value ++ { ++ /* Default value, in case the component is a numeric value. */ ++ union hlsl_constant_value_component value; ++ } *default_values; ++ + /* A dynamic array containing the state block on the variable's declaration, if any. + * An array variable may contain multiple state blocks. + * A technique pass will always contain one. +@@ -460,6 +495,8 @@ struct hlsl_ir_var + uint32_t is_uniform : 1; + uint32_t is_param : 1; + uint32_t is_separated_resource : 1; ++ uint32_t is_synthetic : 1; ++ uint32_t has_explicit_bind_point : 1; + }; + + /* This struct is used to represent assignments in state block entries: +@@ -470,22 +507,31 @@ struct hlsl_ir_var + * name[lhs_index] = args[0] + * - or - + * name[lhs_index] = {args[0], args[1], ...}; ++ * ++ * This struct also represents function call syntax: ++ * name(args[0], args[1], ...) + */ + struct hlsl_state_block_entry + { +- /* For assignments, the name in the lhs. */ ++ /* Whether this entry is a function call. */ ++ bool is_function_call; ++ ++ /* For assignments, the name in the lhs. ++ * For functions, the name of the function. */ + char *name; + /* Resolved format-specific property identifier. */ + unsigned int name_id; + +- /* Whether the lhs in the assignment is indexed and, in that case, its index. */ ++ /* For assignments, whether the lhs of an assignment is indexed and, in ++ * that case, its index. */ + bool lhs_has_index; + unsigned int lhs_index; + +- /* Instructions present in the rhs. */ ++ /* Instructions present in the rhs or the function arguments. */ + struct hlsl_block *instrs; + +- /* For assignments, arguments of the rhs initializer. */ ++ /* For assignments, arguments of the rhs initializer. ++ * For function calls, the arguments themselves. */ + struct hlsl_src *args; + unsigned int args_count; + }; +@@ -556,12 +602,21 @@ struct hlsl_ir_if + struct hlsl_block else_block; + }; + ++enum hlsl_ir_loop_unroll_type ++{ ++ HLSL_IR_LOOP_UNROLL, ++ HLSL_IR_LOOP_FORCE_UNROLL, ++ HLSL_IR_LOOP_FORCE_LOOP ++}; ++ + struct hlsl_ir_loop + { + struct hlsl_ir_node node; + /* loop condition is stored in the body (as "if (!condition) break;") */ + struct hlsl_block body; + unsigned int next_index; /* liveness index of the end of the loop */ ++ unsigned int unroll_limit; ++ enum hlsl_ir_loop_unroll_type unroll_type; + }; + + struct hlsl_ir_switch_case +@@ -583,6 +638,7 @@ struct hlsl_ir_switch + enum hlsl_ir_expr_op + { + HLSL_OP0_VOID, ++ HLSL_OP0_RASTERIZER_SAMPLE_COUNT, + + HLSL_OP1_ABS, + HLSL_OP1_BIT_NOT, +@@ -775,13 +831,7 @@ struct hlsl_ir_constant + struct hlsl_ir_node node; + struct hlsl_constant_value + { +- union hlsl_constant_value_component +- { +- uint32_t u; +- int32_t i; +- float f; +- double d; +- } u[4]; ++ union hlsl_constant_value_component u[4]; + } value; + /* Constant register of type 'c' where the constant value is stored for SM1. */ + struct hlsl_reg reg; +@@ -811,6 +861,8 @@ struct hlsl_scope + bool loop; + /* The scope was created for the switch statement. */ + bool _switch; ++ /* The scope contains annotation variables. */ ++ bool annotations; + }; + + struct hlsl_profile_info +@@ -1249,6 +1301,10 @@ void hlsl_block_cleanup(struct hlsl_block *block); + bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); + + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); ++void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); ++ ++bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, ++ const struct vkd3d_shader_location *loc); + + void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +@@ -1259,7 +1315,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d + bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); + + void hlsl_cleanup_deref(struct hlsl_deref *deref); ++ + void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); ++bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src); + + void hlsl_cleanup_ir_switch_cases(struct list *cases); + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); +@@ -1342,7 +1400,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); + struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, const struct vkd3d_shader_location *loc); ++ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, + const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +@@ -1361,6 +1419,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ + unsigned int sample_count); + struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct hlsl_type *format, bool rasteriser_ordered); ++struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); + struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, + const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, +@@ -1432,10 +1491,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, + + D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); + D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +-bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); +-bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); +-int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); ++bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); ++bool hlsl_sm1_usage_from_semantic(const char *semantic_name, ++ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); ++ ++void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); ++int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + + bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, + const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index a5923d8bf8e..55993dac2b4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -49,7 +49,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); + RESERVED1 auto|catch|char|class|const_cast|delete|dynamic_cast|enum + RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public + RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try +-RESERVED4 typename|union|unsigned|using|virtual ++RESERVED4 typename|union|using|virtual + + WS [ \t] + NEWLINE (\n)|(\r\n) +@@ -164,6 +164,7 @@ textureCUBE {return KW_TEXTURECUBE; } + TextureCubeArray {return KW_TEXTURECUBEARRAY; } + true {return KW_TRUE; } + typedef {return KW_TYPEDEF; } ++unsigned {return KW_UNSIGNED; } + uniform {return KW_UNIFORM; } + vector {return KW_VECTOR; } + VertexShader {return KW_VERTEXSHADER; } +@@ -197,7 +198,9 @@ while {return KW_WHILE; } + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + yylval->name = hlsl_strdup(ctx, yytext); +- if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) ++ if (hlsl_version_ge(ctx, 5, 1) && !strcmp(yytext, "ConstantBuffer")) ++ return KW_CONSTANTBUFFER; ++ else if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) + return VAR_IDENTIFIER; + else if (hlsl_get_type(ctx->cur_scope, yytext, true, true)) + return TYPE_IDENTIFIER; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 9c1bdef926d..7b058a65bc1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -573,12 +573,91 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); + } + ++static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) ++{ ++ union hlsl_constant_value_component ret = {0}; ++ struct hlsl_ir_constant *constant; ++ struct hlsl_ir_node *node; ++ struct hlsl_block expr; ++ struct hlsl_src src; ++ ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ switch (node->type) ++ { ++ case HLSL_IR_CONSTANT: ++ case HLSL_IR_EXPR: ++ case HLSL_IR_SWIZZLE: ++ case HLSL_IR_LOAD: ++ case HLSL_IR_INDEX: ++ continue; ++ case HLSL_IR_STORE: ++ if (hlsl_ir_store(node)->lhs.var->is_synthetic) ++ break; ++ /* fall-through */ ++ case HLSL_IR_CALL: ++ case HLSL_IR_IF: ++ case HLSL_IR_LOOP: ++ case HLSL_IR_JUMP: ++ case HLSL_IR_RESOURCE_LOAD: ++ case HLSL_IR_RESOURCE_STORE: ++ case HLSL_IR_SWITCH: ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Expected literal expression."); ++ break; ++ } ++ } ++ ++ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) ++ return ret; ++ hlsl_block_add_block(&expr, block); ++ ++ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) ++ { ++ hlsl_block_cleanup(&expr); ++ return ret; ++ } ++ ++ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ ++ hlsl_src_from_node(&src, node_from_block(&expr)); ++ hlsl_run_const_passes(ctx, &expr); ++ node = src.node; ++ hlsl_src_remove(&src); ++ ++ if (node->type == HLSL_IR_CONSTANT) ++ { ++ constant = hlsl_ir_constant(node); ++ ret = constant->value.u[0]; ++ } ++ else ++ { ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Failed to evaluate constant expression."); ++ } ++ ++ hlsl_block_cleanup(&expr); ++ ++ return ret; ++} ++ ++static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct vkd3d_shader_location *loc) ++{ ++ union hlsl_constant_value_component res; ++ ++ res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); ++ return res.u; ++} ++ + static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, + struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) + { ++ enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; ++ unsigned int i, unroll_limit = 0; + struct hlsl_ir_node *loop; +- unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); +@@ -591,18 +670,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct hlsl_attribute *attr = attributes->attrs[i]; + if (!strcmp(attr->name, "unroll")) + { +- if (attr->args_count) ++ if (attr->args_count > 1) + { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); ++ hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, ++ "Ignoring 'unroll' attribute with more than 1 argument."); ++ continue; + } +- else ++ ++ if (attr->args_count == 1) + { +- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); ++ struct hlsl_block expr; ++ hlsl_block_init(&expr); ++ if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) ++ return NULL; ++ ++ unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); ++ hlsl_block_cleanup(&expr); + } ++ ++ unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; + } + else if (!strcmp(attr->name, "loop")) + { +- /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ ++ unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + } + else if (!strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) +@@ -631,7 +721,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + else + list_move_head(&body->instrs, &cond->instrs); + +- if (!(loop = hlsl_new_loop(ctx, body, loc))) ++ if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) + goto oom; + hlsl_block_add_instr(init, loop); + +@@ -1013,6 +1103,10 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); + } + } ++ ++ if (hlsl_version_ge(ctx, 5, 1) && field->type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(field->type)) ++ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); ++ + vkd3d_free(v->arrays.sizes); + field->loc = v->loc; + field->name = v->name; +@@ -1210,12 +1304,42 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl + return true; + } + +-static bool parse_reservation_index(const char *string, char *type, uint32_t *index) ++static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, ++ struct hlsl_reg_reservation *reservation) + { +- if (!sscanf(string + 1, "%u", index)) +- return false; ++ char *endptr; ++ ++ reservation->reg_type = ascii_tolower(string[0]); ++ ++ /* Prior to SM5.1, fxc simply ignored bracket offsets for 'b' types. */ ++ if (reservation->reg_type == 'b' && hlsl_version_lt(ctx, 5, 1)) ++ { ++ bracket_offset = 0; ++ } ++ ++ if (string[1] == '\0') ++ { ++ reservation->reg_index = bracket_offset; ++ return true; ++ } ++ ++ reservation->reg_index = strtoul(string + 1, &endptr, 10) + bracket_offset; ++ ++ if (*endptr) ++ { ++ /* fxc for SM >= 4 treats all parse failures for 'b' types as successes, ++ * setting index to -1. It will later fail while validating slot limits. */ ++ if (reservation->reg_type == 'b' && hlsl_version_ge(ctx, 4, 0)) ++ { ++ reservation->reg_index = -1; ++ return true; ++ } ++ ++ /* All other types tolerate leftover characters. */ ++ if (endptr == string + 1) ++ return false; ++ } + +- *type = ascii_tolower(string[0]); + return true; + } + +@@ -1286,72 +1410,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * + return block; + } + +-static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, +- const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_constant *constant; +- struct hlsl_ir_node *node; +- struct hlsl_block expr; +- unsigned int ret = 0; +- struct hlsl_src src; +- +- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) +- { +- switch (node->type) +- { +- case HLSL_IR_CONSTANT: +- case HLSL_IR_EXPR: +- case HLSL_IR_SWIZZLE: +- case HLSL_IR_LOAD: +- case HLSL_IR_INDEX: +- continue; +- case HLSL_IR_CALL: +- case HLSL_IR_IF: +- case HLSL_IR_LOOP: +- case HLSL_IR_JUMP: +- case HLSL_IR_RESOURCE_LOAD: +- case HLSL_IR_RESOURCE_STORE: +- case HLSL_IR_STORE: +- case HLSL_IR_SWITCH: +- case HLSL_IR_STATEBLOCK_CONSTANT: +- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Expected literal expression."); +- } +- } +- +- if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) +- return 0; +- hlsl_block_add_block(&expr, block); +- +- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), +- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) +- { +- hlsl_block_cleanup(&expr); +- return 0; +- } +- +- /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ +- hlsl_src_from_node(&src, node_from_block(&expr)); +- hlsl_run_const_passes(ctx, &expr); +- node = src.node; +- hlsl_src_remove(&src); +- +- if (node->type == HLSL_IR_CONSTANT) +- { +- constant = hlsl_ir_constant(node); +- ret = constant->value.u[0].u; +- } +- else +- { +- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Failed to evaluate constant expression."); +- } +- +- hlsl_block_cleanup(&expr); +- +- return ret; +-} +- + static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) + { + /* Scalar vars can be converted to pretty much everything */ +@@ -1862,12 +1920,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned + return true; + } + ++static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) ++{ ++ /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. ++ * components are indexed by their sources. i.e. the first component comes from the first ++ * component of the rhs. */ ++ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; ++ ++ /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ ++ for (i = 0; i < 4; ++i) ++ { ++ if (*writemask & (1 << i)) ++ { ++ unsigned int s = (*swizzle >> (i * 8)) & 0xff; ++ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; ++ unsigned int idx = x + y * 4; ++ new_swizzle |= s << (bit++ * 8); ++ if (new_writemask & (1 << idx)) ++ return false; ++ new_writemask |= 1 << idx; ++ } ++ } ++ width = bit; ++ ++ /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the ++ * incoming vector. */ ++ bit = 0; ++ for (i = 0; i < 16; ++i) ++ { ++ for (j = 0; j < width; ++j) ++ { ++ unsigned int s = (new_swizzle >> (j * 8)) & 0xff; ++ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; ++ unsigned int idx = x + y * 4; ++ if (idx == i) ++ inverted |= j << (bit++ * 2); ++ } ++ } ++ ++ *swizzle = inverted; ++ *writemask = new_writemask; ++ *ret_width = width; ++ return true; ++} ++ + static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, + enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) + { + struct hlsl_type *lhs_type = lhs->data_type; + struct hlsl_ir_node *copy; +- unsigned int writemask = 0; ++ unsigned int writemask = 0, width = 0; ++ bool matrix_writemask = false; + + if (assign_op == ASSIGN_OP_SUB) + { +@@ -1885,7 +1988,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + } + + if (hlsl_is_numeric_type(lhs_type)) ++ { + writemask = (1 << lhs_type->dimx) - 1; ++ width = lhs_type->dimx; ++ } + + if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) + return NULL; +@@ -1902,12 +2008,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); + struct hlsl_ir_node *new_swizzle; + uint32_t s = swizzle->swizzle; +- unsigned int width; + +- if (lhs->data_type->class == HLSL_CLASS_MATRIX) +- hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); ++ assert(!matrix_writemask); + +- if (!invert_swizzle(&s, &writemask, &width)) ++ if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) ++ { ++ if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) ++ { ++ hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); ++ return NULL; ++ } ++ if (!invert_swizzle_matrix(&s, &writemask, &width)) ++ { ++ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); ++ return NULL; ++ } ++ matrix_writemask = true; ++ } ++ else if (!invert_swizzle(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); + return NULL; +@@ -1955,7 +2073,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + + dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + +- if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) ++ if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, + "Resource store expressions must write to all components."); + +@@ -1971,12 +2089,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&resource_deref); + } ++ else if (matrix_writemask) ++ { ++ struct hlsl_deref deref; ++ unsigned int i, j, k = 0; ++ ++ hlsl_init_deref_from_index_chain(ctx, &deref, lhs); ++ ++ for (i = 0; i < lhs->data_type->dimy; ++i) ++ { ++ for (j = 0; j < lhs->data_type->dimx; ++j) ++ { ++ struct hlsl_ir_node *load; ++ struct hlsl_block store_block; ++ const unsigned int idx = i * 4 + j; ++ const unsigned int component = i * lhs->data_type->dimx + j; ++ ++ if (!(writemask & (1 << idx))) ++ continue; ++ ++ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) ++ { ++ hlsl_cleanup_deref(&deref); ++ return NULL; ++ } ++ ++ if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) ++ { ++ hlsl_cleanup_deref(&deref); ++ return NULL; ++ } ++ hlsl_block_add_block(block, &store_block); ++ } ++ } ++ ++ hlsl_cleanup_deref(&deref); ++ } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) + { + struct hlsl_ir_index *row = hlsl_ir_index(lhs); + struct hlsl_ir_node *mat = row->val.node; + unsigned int i, k = 0; + ++ assert(!matrix_writemask); ++ + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *cell, *load, *store, *c; +@@ -2067,6 +2223,53 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d + return true; + } + ++/* For some reason, for matrices, values from default value initializers end up in different ++ * components than from regular initializers. Default value initializers fill the matrix in ++ * vertical reading order (left-to-right top-to-bottom) instead of regular reading order ++ * (top-to-bottom left-to-right), so they have to be adjusted. */ ++static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, ++ struct hlsl_type *type, unsigned int index) ++{ ++ unsigned int element_comp_count, element, x, y, i; ++ unsigned int base = 0; ++ ++ if (ctx->profile->major_version < 4) ++ return index; ++ ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) ++ return index; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_MATRIX: ++ x = index / type->dimy; ++ y = index % type->dimy; ++ return y * type->dimx + x; ++ ++ case HLSL_CLASS_ARRAY: ++ element_comp_count = hlsl_type_component_count(type->e.array.type); ++ element = index / element_comp_count; ++ base = element * element_comp_count; ++ return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); ++ ++ case HLSL_CLASS_STRUCT: ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ struct hlsl_type *field_type = type->e.record.fields[i].type; ++ ++ element_comp_count = hlsl_type_component_count(field_type); ++ if (index - base < element_comp_count) ++ return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); ++ base += element_comp_count; ++ } ++ break; ++ ++ default: ++ return index; ++ } ++ vkd3d_unreachable(); ++} ++ + static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) + { +@@ -2087,12 +2290,29 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i + + dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); + +- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) +- return; ++ if (dst->default_values) ++ { ++ struct hlsl_default_value default_value = {0}; ++ unsigned int dst_index; + +- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) +- return; +- hlsl_block_add_block(instrs, &block); ++ if (!hlsl_clone_block(ctx, &block, instrs)) ++ return; ++ default_value.value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); ++ ++ dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); ++ dst->default_values[dst_index] = default_value; ++ ++ hlsl_block_cleanup(&block); ++ } ++ else ++ { ++ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) ++ return; ++ ++ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) ++ return; ++ hlsl_block_add_block(instrs, &block); ++ } + + ++*store_index; + } +@@ -2171,6 +2391,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + struct hlsl_semantic new_semantic; + uint32_t modifiers = v->modifiers; + bool unbounded_res_array = false; ++ bool constant_buffer = false; + struct hlsl_ir_var *var; + struct hlsl_type *type; + bool local = true; +@@ -2190,6 +2411,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); + } + ++ if (type->class == HLSL_CLASS_CONSTANT_BUFFER) ++ { ++ type = type->e.resource.format; ++ constant_buffer = true; ++ } ++ + if (unbounded_res_array) + { + if (v->arrays.count == 1) +@@ -2246,17 +2473,22 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + } + } + ++ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) ++ { ++ /* SM 5.1/6.x descriptor arrays act differently from previous versions. ++ * Not only are they treated as a single object in reflection, but they ++ * act as a single component for the purposes of assignment and ++ * initialization. */ ++ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); ++ } ++ + if (!(var_name = vkd3d_strdup(v->name))) + return; + +- new_semantic = v->semantic; +- if (v->semantic.name) ++ if (!hlsl_clone_semantic(ctx, &new_semantic, &v->semantic)) + { +- if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) +- { +- vkd3d_free(var_name); +- return; +- } ++ vkd3d_free(var_name); ++ return; + } + + if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) +@@ -2266,7 +2498,16 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + return; + } + +- var->buffer = ctx->cur_buffer; ++ if (constant_buffer && ctx->cur_scope == ctx->globals) ++ { ++ if (!(var_name = vkd3d_strdup(v->name))) ++ return; ++ var->buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, var_name, modifiers, &v->reg_reservation, NULL, &v->loc); ++ } ++ else ++ { ++ var->buffer = ctx->cur_buffer; ++ } + + if (var->buffer == ctx->globals_buffer) + { +@@ -2289,8 +2530,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + +- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) ++ && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ { + check_invalid_object_fields(ctx, var); ++ } + + if ((func = hlsl_get_first_func_decl(ctx, var->name))) + { +@@ -2348,6 +2592,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + { + struct parse_variable_def *v, *v_next; + struct hlsl_block *initializers; ++ unsigned int component_count; + struct hlsl_ir_var *var; + struct hlsl_type *type; + +@@ -2371,6 +2616,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + } + + type = var->data_type; ++ component_count = hlsl_type_component_count(type); + + var->state_blocks = v->state_blocks; + var->state_block_count = v->state_block_count; +@@ -2379,51 +2625,78 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + v->state_block_capacity = 0; + v->state_blocks = NULL; + +- if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) ++ if (var->state_blocks && component_count != var->state_block_count) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u state blocks, but got %u.", +- hlsl_type_component_count(type), var->state_block_count); ++ "Expected %u state blocks, but got %u.", component_count, var->state_block_count); + free_parse_variable_def(v); + continue; + } + + if (v->initializer.args_count) + { +- if (v->initializer.braces) ++ unsigned int store_index = 0; ++ bool is_default_values_initializer; ++ unsigned int size, k; ++ ++ is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) ++ || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) ++ || ctx->cur_scope->annotations; ++ ++ if (is_default_values_initializer) + { +- unsigned int size = initializer_size(&v->initializer); +- unsigned int store_index = 0; +- unsigned int k; ++ /* Default values might have been allocated already for another variable of the same name, ++ in the same scope. */ ++ if (var->default_values) ++ { ++ free_parse_variable_def(v); ++ continue; ++ } + +- if (hlsl_type_component_count(type) != size) ++ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) + { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u components in initializer, but got %u.", +- hlsl_type_component_count(type), size); + free_parse_variable_def(v); + continue; + } ++ } + +- for (k = 0; k < v->initializer.args_count; ++k) ++ if (!v->initializer.braces) ++ { ++ if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) + { +- initialize_var_components(ctx, v->initializer.instrs, var, +- &store_index, v->initializer.args[k]); ++ free_parse_variable_def(v); ++ continue; + } ++ ++ v->initializer.args[0] = node_from_block(v->initializer.instrs); + } +- else ++ ++ size = initializer_size(&v->initializer); ++ if (component_count != size) + { +- struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected %u components in initializer, but got %u.", component_count, size); ++ free_parse_variable_def(v); ++ continue; ++ } + +- assert(v->initializer.args_count == 1); +- hlsl_block_add_instr(v->initializer.instrs, &load->node); +- add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); ++ for (k = 0; k < v->initializer.args_count; ++k) ++ { ++ initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); + } + +- if (var->storage_modifiers & HLSL_STORAGE_STATIC) ++ if (is_default_values_initializer) ++ { ++ hlsl_dump_var_default_values(var); ++ } ++ else if (var->storage_modifiers & HLSL_STORAGE_STATIC) ++ { + hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); ++ } + else ++ { + hlsl_block_add_block(initializers, v->initializer.instrs); ++ } + } + else if (var->storage_modifiers & HLSL_STORAGE_STATIC) + { +@@ -3353,6 +3626,34 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); + } + ++static bool intrinsic_faceforward(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s faceforward(%s n, %s i, %s ng)\n" ++ "{\n" ++ " return dot(i, ng) < 0 ? n : -n;\n" ++ "}\n"; ++ ++ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) ++ return false; ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type->name, type->name, type->name, type->name))) ++ return false; ++ func = hlsl_compile_internal_function(ctx, "faceforward", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ + static bool intrinsic_floor(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4032,6 +4333,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, + static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) + { ++ unsigned int sampler_dim = hlsl_sampler_dim_count(dim); + struct hlsl_resource_load_params load_params = { 0 }; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *coords, *sample; +@@ -4043,11 +4345,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + return false; + } + +- if (params->args_count == 4) +- { +- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); +- } +- + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_SAMPLER + || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) +@@ -4061,18 +4358,22 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + hlsl_release_string_buffer(ctx, string); + } + +- if (!strcmp(name, "tex2Dlod")) ++ if (!strcmp(name, "tex2Dbias") ++ || !strcmp(name, "tex2Dlod")) + { + struct hlsl_ir_node *lod, *c; + +- load_params.type = HLSL_RESOURCE_SAMPLE_LOD; ++ if (!strcmp(name, "tex2Dlod")) ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; ++ else ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; + +- if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) ++ if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) + return false; + hlsl_block_add_instr(params->instrs, c); + +- if (!(coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, +- hlsl_sampler_dim_count(dim)), loc))) ++ if (!(coords = add_implicit_conversion(ctx, params->instrs, c, ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } +@@ -4099,14 +4400,13 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + + if (hlsl_version_ge(ctx, 4, 0)) + { +- unsigned int count = hlsl_sampler_dim_count(dim); + struct hlsl_ir_node *divisor; + +- if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), count, coords, loc))) ++ if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) + return false; + hlsl_block_add_instr(params->instrs, divisor); + +- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), count, coords, loc))) ++ if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) + return false; + hlsl_block_add_instr(params->instrs, coords); + +@@ -4120,12 +4420,34 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; + } + } ++ else if (params->args_count == 4) /* Gradient sampling. */ ++ { ++ if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ { ++ return false; ++ } ++ ++ if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ { ++ return false; ++ } ++ ++ if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ { ++ return false; ++ } ++ ++ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; ++ } + else + { + load_params.type = HLSL_RESOURCE_SAMPLE; + + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } +@@ -4181,12 +4503,30 @@ static bool intrinsic_tex1D(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); + } + ++static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); ++} ++ + static bool intrinsic_tex2D(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); + } + ++static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); ++} ++ ++static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); ++} ++ + static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4205,6 +4545,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); + } + ++static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); ++} ++ + static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4217,6 +4563,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); + } + ++static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); ++} ++ + static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4336,6 +4688,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + return true; + } + ++static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *expr; ++ ++ if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, ++ operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, expr); ++ ++ return true; ++} ++ + static const struct intrinsic_function + { + const char *name; +@@ -4348,6 +4714,7 @@ intrinsic_functions[] = + { + /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, ++ {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, + {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, + {"all", 1, true, intrinsic_all}, +@@ -4375,6 +4742,7 @@ intrinsic_functions[] = + {"dot", 2, true, intrinsic_dot}, + {"exp", 1, true, intrinsic_exp}, + {"exp2", 1, true, intrinsic_exp2}, ++ {"faceforward", 3, true, intrinsic_faceforward}, + {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, + {"frac", 1, true, intrinsic_frac}, +@@ -4406,12 +4774,17 @@ intrinsic_functions[] = + {"tan", 1, true, intrinsic_tan}, + {"tanh", 1, true, intrinsic_tanh}, + {"tex1D", -1, false, intrinsic_tex1D}, ++ {"tex1Dgrad", 4, false, intrinsic_tex1Dgrad}, + {"tex2D", -1, false, intrinsic_tex2D}, ++ {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, ++ {"tex2Dgrad", 4, false, intrinsic_tex2Dgrad}, + {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, + {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, + {"tex3D", -1, false, intrinsic_tex3D}, ++ {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, + {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, + {"texCUBE", -1, false, intrinsic_texCUBE}, ++ {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, + {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, + {"transpose", 1, true, intrinsic_transpose}, + {"trunc", 1, true, intrinsic_trunc}, +@@ -5481,6 +5854,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_BREAK + %token KW_BUFFER + %token KW_CASE ++%token KW_CONSTANTBUFFER + %token KW_CBUFFER + %token KW_CENTROID + %token KW_COLUMN_MAJOR +@@ -5566,6 +5940,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_TEXTURECUBEARRAY + %token KW_TRUE + %token KW_TYPEDEF ++%token KW_UNSIGNED + %token KW_UNIFORM + %token KW_VECTOR + %token KW_VERTEXSHADER +@@ -5670,6 +6045,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + + %type if_body + ++%type array ++ + %type var_modifiers + + %type any_identifier +@@ -5717,8 +6094,7 @@ hlsl_prog: + | hlsl_prog buffer_declaration buffer_body + | hlsl_prog declaration_statement + { +- if (!list_empty(&$2->instrs)) +- hlsl_fixme(ctx, &@2, "Uniform initializer."); ++ hlsl_block_add_block(&ctx->static_initializers, $2); + destroy_block($2); + } + | hlsl_prog preproc_directive +@@ -5742,19 +6118,31 @@ pass: + + annotations_list: + variables_def_typed ';' ++ { ++ struct hlsl_block *block; ++ ++ block = initialize_vars(ctx, $1); ++ destroy_block(block); ++ } + | annotations_list variables_def_typed ';' ++ { ++ struct hlsl_block *block; ++ ++ block = initialize_vars(ctx, $2); ++ destroy_block(block); ++ } + + annotations_opt: + %empty + { + $$ = NULL; + } +- | '<' scope_start '>' ++ | '<' annotations_scope_start '>' + { + hlsl_pop_scope(ctx); + $$ = NULL; + } +- | '<' scope_start annotations_list '>' ++ | '<' annotations_scope_start annotations_list '>' + { + struct hlsl_scope *scope = ctx->cur_scope; + +@@ -6282,6 +6670,13 @@ switch_scope_start: + ctx->cur_scope->_switch = true; + } + ++annotations_scope_start: ++ %empty ++ { ++ hlsl_push_scope(ctx); ++ ctx->cur_scope->annotations = true; ++ } ++ + var_identifier: + VAR_IDENTIFIER + | NEW_IDENTIFIER +@@ -6315,6 +6710,9 @@ semantic: + { + char *p; + ++ if (!($$.raw_name = hlsl_strdup(ctx, $2))) ++ YYABORT; ++ + for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) + ; + $$.name = $2; +@@ -6330,22 +6728,34 @@ register_reservation: + ':' KW_REGISTER '(' any_identifier ')' + { + memset(&$$, 0, sizeof($$)); +- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ if (!parse_reservation_index(ctx, $4, 0, &$$)) ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ ++ vkd3d_free($4); ++ } ++ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ')' ++ { ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) ++ { + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); ++ } + + vkd3d_free($4); ++ vkd3d_free($6); + } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' + { + memset(&$$, 0, sizeof($$)); +- if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ if (parse_reservation_index(ctx, $6, 0, &$$)) + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + } + else if (parse_reservation_space($6, &$$.reg_space)) + { +- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ if (!parse_reservation_index(ctx, $4, 0, &$$)) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + } +@@ -6358,12 +6768,45 @@ register_reservation: + vkd3d_free($4); + vkd3d_free($6); + } ++ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ',' any_identifier ')' ++ { ++ memset(&$$, 0, sizeof($$)); ++ ++ if (!parse_reservation_space($9, &$$.reg_space)) ++ hlsl_error(ctx, &@9, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register space reservation '%s'.", $9); ++ ++ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) ++ { ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ } ++ ++ vkd3d_free($4); ++ vkd3d_free($6); ++ vkd3d_free($9); ++ } ++ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ')' ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); ++ ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) ++ { ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $6); ++ } ++ ++ vkd3d_free($4); ++ vkd3d_free($6); ++ vkd3d_free($8); ++ } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + + memset(&$$, 0, sizeof($$)); +- if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ if (!parse_reservation_index(ctx, $6, 0, &$$)) + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $6); + +@@ -6375,6 +6818,26 @@ register_reservation: + vkd3d_free($6); + vkd3d_free($8); + } ++ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ',' any_identifier ')' ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); ++ ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) ++ { ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $6); ++ } ++ ++ if (!parse_reservation_space($11, &$$.reg_space)) ++ hlsl_error(ctx, &@11, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register space reservation '%s'.", $11); ++ ++ vkd3d_free($4); ++ vkd3d_free($6); ++ vkd3d_free($8); ++ vkd3d_free($11); ++ } + + packoffset_reservation: + ':' KW_PACKOFFSET '(' any_identifier ')' +@@ -6449,8 +6912,13 @@ parameter: + } + type = hlsl_new_array_type(ctx, type, $4.sizes[i]); + } ++ vkd3d_free($4.sizes); ++ + $$.type = type; + ++ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) ++ hlsl_fixme(ctx, &@2, "Shader model 5.1+ resource array."); ++ + $$.name = $3; + $$.semantic = $5.semantic; + $$.reg_reservation = $5.reg_reservation; +@@ -6713,6 +7181,26 @@ type_no_void: + } + vkd3d_free($1); + } ++ | KW_UNSIGNED TYPE_IDENTIFIER ++ { ++ struct hlsl_type *type = hlsl_get_type(ctx->cur_scope, $2, true, true); ++ ++ if (hlsl_is_numeric_type(type) && type->e.numeric.type == HLSL_TYPE_INT) ++ { ++ if (!(type = hlsl_type_clone(ctx, type, 0, 0))) ++ YYABORT; ++ vkd3d_free((void *)type->name); ++ type->name = NULL; ++ type->e.numeric.type = HLSL_TYPE_UINT; ++ } ++ else ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "The 'unsigned' keyword can't be used with type %s.", $2); ++ } ++ ++ $$ = type; ++ } + | KW_STRUCT TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); +@@ -6724,6 +7212,10 @@ type_no_void: + { + $$ = hlsl_get_type(ctx->cur_scope, "RenderTargetView", true, true); + } ++ | KW_DEPTHSTENCILSTATE ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilState", true, true); ++ } + | KW_DEPTHSTENCILVIEW + { + $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); +@@ -6736,6 +7228,17 @@ type_no_void: + { + $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); + } ++ | KW_CONSTANTBUFFER '<' type '>' ++ { ++ if ($3->class != HLSL_CLASS_STRUCT) ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "ConstantBuffer<...> requires user-defined structure type."); ++ $$ = hlsl_new_cb_type(ctx, $3); ++ } ++ | KW_RASTERIZERSTATE ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); ++ } + + type: + type_no_void +@@ -6932,6 +7435,34 @@ state_block: + hlsl_src_from_node(&entry->args[i], $5.args[i]); + vkd3d_free($5.args); + ++ $$ = $1; ++ state_block_add_entry($$, entry); ++ } ++ | state_block any_identifier '(' func_arguments ')' ';' ++ { ++ struct hlsl_state_block_entry *entry; ++ unsigned int i; ++ ++ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) ++ YYABORT; ++ ++ entry->is_function_call = true; ++ ++ entry->name = $2; ++ entry->lhs_has_index = false; ++ entry->lhs_index = 0; ++ ++ entry->instrs = $4.instrs; ++ ++ entry->args_count = $4.args_count; ++ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) ++ YYABORT; ++ for (i = 0; i < entry->args_count; ++i) ++ hlsl_src_from_node(&entry->args[i], $4.args[i]); ++ vkd3d_free($4.args); ++ ++ hlsl_validate_state_block_entry(ctx, entry, &@4); ++ + $$ = $1; + state_block_add_entry($$, entry); + } +@@ -7020,52 +7551,43 @@ variable_def_typed: + $$->modifiers_loc = @1; + } + +-arrays: +- %empty ++array: ++ '[' ']' + { +- $$.sizes = NULL; +- $$.count = 0; ++ $$ = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; + } +- | '[' expr ']' arrays ++ | '[' expr ']' + { +- uint32_t *new_array; +- unsigned int size; +- +- size = evaluate_static_expression_as_uint(ctx, $2, &@2); +- +- destroy_block($2); ++ $$ = evaluate_static_expression_as_uint(ctx, $2, &@2); + +- $$ = $4; +- +- if (!size) ++ if (!$$) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Array size is not a positive integer constant."); +- vkd3d_free($$.sizes); + YYABORT; + } + +- if (size > 65536) ++ if ($$ > 65536) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, +- "Array size %u is not between 1 and 65536.", size); +- vkd3d_free($$.sizes); ++ "Array size %u is not between 1 and 65536.", $$); + YYABORT; + } + +- if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) +- { +- vkd3d_free($$.sizes); +- YYABORT; +- } +- $$.sizes = new_array; +- $$.sizes[$$.count++] = size; ++ destroy_block($2); + } +- | '[' ']' arrays ++ ++arrays: ++ %empty ++ { ++ $$.sizes = NULL; ++ $$.count = 0; ++ } ++ | array arrays + { + uint32_t *new_array; + +- $$ = $3; ++ $$ = $2; + + if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) + { +@@ -7074,7 +7596,7 @@ arrays: + } + + $$.sizes = new_array; +- $$.sizes[$$.count++] = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; ++ $$.sizes[$$.count++] = $1; + } + + var_modifiers: +@@ -7156,6 +7678,8 @@ var_modifiers: + } + | var_identifier var_modifiers + { ++ $$ = $2; ++ + if (!strcmp($1, "precise")) + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); + else if (!strcmp($1, "single")) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index bdb72a1fab9..7e4f168675e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -218,6 +218,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, + uniform->is_uniform = 1; + uniform->is_param = temp->is_param; + uniform->buffer = temp->buffer; ++ if (temp->default_values) ++ { ++ /* Transfer default values from the temp to the uniform. */ ++ assert(!uniform->default_values); ++ assert(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); ++ uniform->default_values = temp->default_values; ++ temp->default_values = NULL; ++ } + + if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) + return; +@@ -312,7 +320,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + } + } + +- if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) ++ if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) + { + vkd3d_free(new_name); + return NULL; +@@ -1623,9 +1631,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + + switch (type->class) + { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: +@@ -1635,6 +1645,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_ARRAY: + case HLSL_CLASS_STRUCT: ++ case HLSL_CLASS_CONSTANT_BUFFER: + /* FIXME: Actually we shouldn't even get here, but we don't split + * matrices yet. */ + return false; +@@ -1970,6 +1981,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc + return progress; + } + ++enum validation_result ++{ ++ DEREF_VALIDATION_OK, ++ DEREF_VALIDATION_OUT_OF_BOUNDS, ++ DEREF_VALIDATION_NOT_CONSTANT, ++}; ++ ++static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, ++ const struct hlsl_deref *deref) ++{ ++ struct hlsl_type *type = deref->var->data_type; ++ unsigned int i; ++ ++ for (i = 0; i < deref->path_len; ++i) ++ { ++ struct hlsl_ir_node *path_node = deref->path[i].node; ++ unsigned int idx = 0; ++ ++ assert(path_node); ++ if (path_node->type != HLSL_IR_CONSTANT) ++ return DEREF_VALIDATION_NOT_CONSTANT; ++ ++ /* We should always have generated a cast to UINT. */ ++ assert(path_node->data_type->class == HLSL_CLASS_SCALAR ++ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_VECTOR: ++ if (idx >= type->dimx) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Vector index is out of bounds. %u/%u", idx, type->dimx); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_MATRIX: ++ if (idx >= hlsl_type_major_size(type)) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_ARRAY: ++ if (idx >= type->e.array.elements_count) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); ++ } ++ ++ return DEREF_VALIDATION_OK; ++} ++ + static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const char *usage) + { +@@ -1987,60 +2068,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct + } + } + +-static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, ++static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + void *context) + { +- unsigned int start, count; +- +- if (instr->type == HLSL_IR_RESOURCE_LOAD) ++ switch (instr->type) + { +- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); +- +- if (!load->resource.var->is_uniform) ++ case HLSL_IR_RESOURCE_LOAD: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Loaded resource must have a single uniform source."); ++ struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); ++ ++ if (!load->resource.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Loaded resource must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Loaded resource from \"%s\" must be determinable at compile time.", ++ load->resource.var->name); ++ note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); ++ } ++ ++ if (load->sampler.var) ++ { ++ if (!load->sampler.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Resource load sampler must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Resource load sampler from \"%s\" must be determinable at compile time.", ++ load->sampler.var->name); ++ note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); ++ } ++ } ++ break; + } +- else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) ++ case HLSL_IR_RESOURCE_STORE: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Loaded resource from \"%s\" must be determinable at compile time.", +- load->resource.var->name); +- note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); +- } ++ struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + +- if (load->sampler.var) +- { +- if (!load->sampler.var->is_uniform) ++ if (!store->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Resource load sampler must have a single uniform source."); ++ "Accessed resource must have a single uniform source."); + } +- else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) ++ else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Resource load sampler from \"%s\" must be determinable at compile time.", +- load->sampler.var->name); +- note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); ++ "Accessed resource from \"%s\" must be determinable at compile time.", ++ store->resource.var->name); ++ note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); + } ++ break; + } +- } +- else if (instr->type == HLSL_IR_RESOURCE_STORE) +- { +- struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); +- +- if (!store->resource.var->is_uniform) ++ case HLSL_IR_LOAD: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Accessed resource must have a single uniform source."); ++ struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ validate_component_index_range_from_deref(ctx, &load->src); ++ break; + } +- else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) ++ case HLSL_IR_STORE: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Accessed resource from \"%s\" must be determinable at compile time.", +- store->resource.var->name); +- note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); ++ struct hlsl_ir_store *store = hlsl_ir_store(instr); ++ validate_component_index_range_from_deref(ctx, &store->lhs); ++ break; + } ++ default: ++ break; + } + + return false; +@@ -2554,11 +2652,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + case HLSL_RESOURCE_RESINFO: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: +- case HLSL_RESOURCE_SAMPLE_GRAD: + case HLSL_RESOURCE_SAMPLE_INFO: + return false; + + case HLSL_RESOURCE_SAMPLE: ++ case HLSL_RESOURCE_SAMPLE_GRAD: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_PROJ: +@@ -3815,15 +3913,16 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { ++ const struct hlsl_reg_reservation *reservation = &var->reg_reservation; + unsigned int r; + +- if (var->reg_reservation.reg_type) ++ if (reservation->reg_type) + { + for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) + { + if (var->regs[r].allocation_size > 0) + { +- if (var->reg_reservation.reg_type != get_regset_name(r)) ++ if (reservation->reg_type != get_regset_name(r)) + { + struct vkd3d_string_buffer *type_string; + +@@ -3839,10 +3938,8 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + else + { + var->regs[r].allocated = true; +- var->regs[r].id = var->reg_reservation.reg_index; +- TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, +- var->reg_reservation.reg_index, var->reg_reservation.reg_type, +- var->reg_reservation.reg_index + var->regs[r].allocation_size); ++ var->regs[r].space = reservation->reg_space; ++ var->regs[r].index = reservation->reg_index; + } + } + } +@@ -4181,8 +4278,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + { + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + ++ /* FIXME: We could potentially pack structs or arrays more efficiently... */ ++ + if (type->class <= HLSL_CLASS_VECTOR) +- return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); ++ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); + else + return allocate_range(ctx, allocator, first_write, last_read, reg_size); + } +@@ -4589,6 +4688,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) + + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { ++ struct register_allocator allocator_used = {0}; + struct register_allocator allocator = {0}; + struct hlsl_ir_var *var; + +@@ -4597,6 +4697,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC]; + + if (!var->is_uniform || reg_size == 0) + continue; +@@ -4609,12 +4710,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + assert(reg_size % 4 == 0); + for (i = 0; i < reg_size / 4; ++i) + { +- if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) ++ if (i < bind_count) + { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, +- "Overlapping register() reservations on 'c%u'.", reg_idx + i); ++ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Overlapping register() reservations on 'c%u'.", reg_idx + i); ++ } ++ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); + } +- + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); + } + +@@ -4627,6 +4731,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + } + } + ++ vkd3d_free(allocator_used.allocations); ++ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; +@@ -4697,7 +4803,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + if (ctx->profile->major_version < 4) + { +- D3DSHADER_PARAM_REGISTER_TYPE sm1_type; ++ struct vkd3d_shader_version version; + D3DDECLUSAGE usage; + uint32_t usage_idx; + +@@ -4705,8 +4811,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + return; + +- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); +- if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ builtin = hlsl_sm1_register_from_semantic(&version, ++ var->semantic.name, var->semantic.index, output, &type, ®); ++ if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -4715,7 +4825,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + if ((!output && !var->last_read) || (output && !var->first_write)) + return; +- type = (enum vkd3d_shader_register_type)sm1_type; + } + else + { +@@ -4762,13 +4871,14 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) + } + } + +-static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) ++static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) + { + const struct hlsl_buffer *buffer; + + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) + { +- if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) ++ if (buffer->reservation.reg_type == 'b' ++ && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) + return buffer; + } + return NULL; +@@ -4783,6 +4893,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va + if (register_reservation) + { + var->buffer_offset = 4 * var->reg_reservation.reg_index; ++ var->has_explicit_bind_point = 1; + } + else + { +@@ -4815,6 +4926,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va + } + } + var->buffer_offset = var->reg_reservation.offset_index; ++ var->has_explicit_bind_point = 1; + } + else + { +@@ -4913,11 +5025,19 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) + } + } + ++static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx) ++{ ++ if (hlsl_version_ge(ctx, 5, 1)) ++ return UINT_MAX; ++ ++ return 13; ++} ++ + static void allocate_buffers(struct hlsl_ctx *ctx) + { + struct hlsl_buffer *buffer; ++ uint32_t index = 0, id = 0; + struct hlsl_ir_var *var; +- uint32_t index = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +@@ -4938,32 +5058,59 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + + if (buffer->type == HLSL_BUFFER_CONSTANT) + { +- if (buffer->reservation.reg_type == 'b') ++ const struct hlsl_reg_reservation *reservation = &buffer->reservation; ++ ++ if (reservation->reg_type == 'b') + { +- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); ++ const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, ++ reservation->reg_space, reservation->reg_index); ++ unsigned int max_index = get_max_cbuffer_reg_index(ctx); ++ ++ if (buffer->reservation.reg_index > max_index) ++ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Buffer reservation cb%u exceeds target's maximum (cb%u).", ++ buffer->reservation.reg_index, max_index); + + if (reserved_buffer && reserved_buffer != buffer) + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, +- "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); ++ "Multiple buffers bound to space %u, index %u.", ++ reservation->reg_space, reservation->reg_index); + hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, +- "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); ++ "Buffer %s is already bound to space %u, index %u.", ++ reserved_buffer->name, reservation->reg_space, reservation->reg_index); + } + +- buffer->reg.id = buffer->reservation.reg_index; ++ buffer->reg.space = reservation->reg_space; ++ buffer->reg.index = reservation->reg_index; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ buffer->reg.id = id++; ++ else ++ buffer->reg.id = buffer->reg.index; + buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; +- TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); ++ TRACE("Allocated reserved %s to space %u, index %u, id %u.\n", ++ buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id); + } +- else if (!buffer->reservation.reg_type) ++ else if (!reservation->reg_type) + { +- while (get_reserved_buffer(ctx, index)) ++ unsigned int max_index = get_max_cbuffer_reg_index(ctx); ++ while (get_reserved_buffer(ctx, 0, index)) + ++index; + +- buffer->reg.id = index; ++ if (index > max_index) ++ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Too many buffers allocated, target's maximum is %u.", max_index); ++ ++ buffer->reg.space = 0; ++ buffer->reg.index = index; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ buffer->reg.id = id++; ++ else ++ buffer->reg.id = buffer->reg.index; + buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; +- TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++ TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id); + ++index; + } + else +@@ -4980,7 +5127,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + } + + static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, +- uint32_t index, bool allocated_only) ++ uint32_t space, uint32_t index, bool allocated_only) + { + const struct hlsl_ir_var *var; + unsigned int start, count; +@@ -4995,12 +5142,18 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + start = var->reg_reservation.reg_index; + count = var->data_type->reg_size[regset]; + ++ if (var->reg_reservation.reg_space != space) ++ continue; ++ + if (!var->regs[regset].allocated && allocated_only) + continue; + } + else if (var->regs[regset].allocated) + { +- start = var->regs[regset].id; ++ if (var->regs[regset].space != space) ++ continue; ++ ++ start = var->regs[regset].index; + count = var->regs[regset].allocation_size; + } + else +@@ -5017,8 +5170,8 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + { + char regset_name = get_regset_name(regset); ++ uint32_t min_index = 0, id = 0; + struct hlsl_ir_var *var; +- uint32_t min_index = 0; + + if (regset == HLSL_REGSET_UAVS) + { +@@ -5041,35 +5194,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + if (var->regs[regset].allocated) + { + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; +- unsigned int index, i; ++ unsigned int i; + +- if (var->regs[regset].id < min_index) ++ if (var->regs[regset].index < min_index) + { + assert(regset == HLSL_REGSET_UAVS); + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "UAV index (%u) must be higher than the maximum render target index (%u).", +- var->regs[regset].id, min_index - 1); ++ var->regs[regset].index, min_index - 1); + continue; + } + + for (i = 0; i < count; ++i) + { +- index = var->regs[regset].id + i; ++ unsigned int space = var->regs[regset].space; ++ unsigned int index = var->regs[regset].index + i; + + /* get_allocated_object() may return "var" itself, but we + * actually want that, otherwise we'll end up reporting the + * same conflict between the same two variables twice. */ +- reserved_object = get_allocated_object(ctx, regset, index, true); ++ reserved_object = get_allocated_object(ctx, regset, space, index, true); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, +- "Multiple variables bound to %c%u.", regset_name, index); ++ "Multiple variables bound to space %u, %c%u.", regset_name, space, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, +- "Variable '%s' is already bound to %c%u.", reserved_object->name, +- regset_name, index); ++ "Variable '%s' is already bound to space %u, %c%u.", ++ reserved_object->name, regset_name, space, index); + last_reported = reserved_object; + } + } ++ ++ if (hlsl_version_ge(ctx, 5, 1)) ++ var->regs[regset].id = id++; ++ else ++ var->regs[regset].id = var->regs[regset].index; ++ TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n", ++ var->name, var->regs[regset].space, regset_name, var->regs[regset].index, ++ regset_name, var->regs[regset].index + count, var->regs[regset].id); + } + else + { +@@ -5078,7 +5240,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + + while (available < count) + { +- if (get_allocated_object(ctx, regset, index, false)) ++ if (get_allocated_object(ctx, regset, 0, index, false)) + available = 0; + else + ++available; +@@ -5086,10 +5248,15 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + } + index -= count; + +- var->regs[regset].id = index; ++ var->regs[regset].space = 0; ++ var->regs[regset].index = index; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ var->regs[regset].id = id++; ++ else ++ var->regs[regset].id = var->regs[regset].index; + var->regs[regset].allocated = true; +- TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, +- index + count); ++ TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name, ++ regset_name, index, regset_name, index + count, var->regs[regset].id); + ++index; + } + } +@@ -5123,21 +5290,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + { + case HLSL_CLASS_VECTOR: + if (idx >= type->dimx) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Vector index is out of bounds. %u/%u", idx, type->dimx); + return false; +- } + *start += idx; + break; + + case HLSL_CLASS_MATRIX: + if (idx >= hlsl_type_major_size(type)) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); + return false; +- } + if (hlsl_type_is_row_major(type)) + *start += idx * type->dimx; + else +@@ -5146,11 +5305,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); + return false; +- } + *start += idx * hlsl_type_component_count(type->e.array.type); + break; + +@@ -5295,6 +5450,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + assert(deref->data_type); + assert(hlsl_is_numeric_type(deref->data_type)); + ++ ret.index += offset / 4; + ret.id += offset / 4; + + ret.writemask = 0xf & (0xf << (offset % 4)); +@@ -5446,6 +5602,330 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + } while (progress); + } + ++static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, ++ struct vsir_program *program, bool output, struct hlsl_ir_var *var) ++{ ++ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ enum vkd3d_shader_register_type type; ++ struct shader_signature *signature; ++ struct signature_element *element; ++ unsigned int register_index, mask; ++ ++ if ((!output && !var->last_read) || (output && !var->first_write)) ++ return; ++ ++ if (output) ++ signature = &program->output_signature; ++ else ++ signature = &program->input_signature; ++ ++ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, ++ signature->element_count + 1, sizeof(*signature->elements))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element = &signature->elements[signature->element_count++]; ++ ++ if (!hlsl_sm1_register_from_semantic(&program->shader_version, ++ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ { ++ unsigned int usage, usage_idx; ++ bool ret; ++ ++ register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ ++ ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); ++ assert(ret); ++ /* With the exception of vertex POSITION output, none of these are ++ * system values. Pixel POSITION input is not equivalent to ++ * SV_Position; the closer equivalent is VPOS, which is not declared ++ * as a semantic. */ ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ } ++ mask = (1 << var->data_type->dimx) - 1; ++ ++ memset(element, 0, sizeof(*element)); ++ if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) ++ { ++ --signature->element_count; ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element->semantic_index = var->semantic.index; ++ element->sysval_semantic = sysval; ++ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ element->register_index = register_index; ++ element->target_location = register_index; ++ element->register_count = 1; ++ element->mask = mask; ++ element->used_mask = mask; ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) ++ element->interpolation_mode = VKD3DSIM_LINEAR; ++} ++ ++static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) ++{ ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_input_semantic) ++ sm1_generate_vsir_signature_entry(ctx, program, false, var); ++ if (var->is_output_semantic) ++ sm1_generate_vsir_signature_entry(ctx, program, true, var); ++ } ++} ++ ++/* OBJECTIVE: Translate all the information from ctx and entry_func to the ++ * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() ++ * without relying on ctx and entry_func. */ ++static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) ++{ ++ struct vkd3d_shader_version version = {0}; ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ if (!vsir_program_init(program, &version, 0)) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ write_sm1_uniforms(ctx, &buffer); ++ if (buffer.status) ++ { ++ vkd3d_free(buffer.data); ++ ctx->result = buffer.status; ++ return; ++ } ++ ctab->code = buffer.data; ++ ctab->size = buffer.size; ++ ++ sm1_generate_vsir_signature(ctx, program); ++} ++ ++static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, ++ struct hlsl_block **found_block) ++{ ++ struct hlsl_ir_node *node; ++ ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (node == stop_point) ++ return NULL; ++ ++ if (node->type == HLSL_IR_IF) ++ { ++ struct hlsl_ir_if *iff = hlsl_ir_if(node); ++ struct hlsl_ir_jump *jump = NULL; ++ ++ if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) ++ return jump; ++ if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) ++ return jump; ++ } ++ else if (node->type == HLSL_IR_JUMP) ++ { ++ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); ++ ++ if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) ++ { ++ *found_block = block; ++ return jump; ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) ++{ ++ /* Always use the explicit limit if it has been passed. */ ++ if (loop->unroll_limit) ++ return loop->unroll_limit; ++ ++ /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ return 1024; ++ ++ /* SM4 limits implicit unrolling to 254 iterations. */ ++ if (hlsl_version_ge(ctx, 4, 0)) ++ return 254; ++ ++ /* SM<3 implicitly unrolls up to 1024 iterations. */ ++ return 1024; ++} ++ ++static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) ++{ ++ unsigned int max_iterations, i; ++ ++ max_iterations = loop_unrolling_get_max_iterations(ctx, loop); ++ ++ for (i = 0; i < max_iterations; ++i) ++ { ++ struct hlsl_block tmp_dst, *jump_block; ++ struct hlsl_ir_jump *jump = NULL; ++ ++ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) ++ return false; ++ list_move_before(&loop->node.entry, &tmp_dst.instrs); ++ hlsl_block_cleanup(&tmp_dst); ++ ++ hlsl_run_const_passes(ctx, block); ++ ++ if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) ++ { ++ enum hlsl_ir_jump_type type = jump->type; ++ ++ if (jump_block != loop_parent) ++ { ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, ++ "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); ++ return false; ++ } ++ ++ list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); ++ hlsl_block_cleanup(&tmp_dst); ++ ++ if (type == HLSL_IR_JUMP_BREAK) ++ break; ++ } ++ } ++ ++ /* Native will not emit an error if max_iterations has been reached with an ++ * explicit limit. It also will not insert a loop if there are iterations left ++ * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ ++ if (!loop->unroll_limit && i == max_iterations) ++ { ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, ++ "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); ++ return false; ++ } ++ ++ list_remove(&loop->node.entry); ++ hlsl_free_instr(&loop->node); ++ ++ return true; ++} ++ ++/* ++ * loop_unrolling_find_unrollable_loop() is not the normal way to do things; ++ * normal passes simply iterate over the whole block and apply a transformation ++ * to every relevant instruction. However, loop unrolling can fail, and we want ++ * to leave the loop in its previous state in that case. That isn't a problem by ++ * itself, except that loop unrolling needs copy-prop in order to work properly, ++ * and copy-prop state at the time of the loop depends on the rest of the program ++ * up to that point. This means we need to clone the whole program, and at that ++ * point we have to search it again anyway to find the clone of the loop we were ++ * going to unroll. ++ * ++ * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop ++ * up until the loop instruction, clone just that loop, then use copyprop again ++ * with the saved state after unrolling. However, copyprop currently isn't built ++ * for that yet [notably, it still relies on indices]. Note also this still doesn't ++ * really let us use transform_ir() anyway [since we don't have a good way to say ++ * "copyprop from the beginning of the program up to the instruction we're ++ * currently processing" from the callback]; we'd have to use a dedicated ++ * recursive function instead. */ ++static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_block **containing_block) ++{ ++ struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ switch (instr->type) ++ { ++ case HLSL_IR_LOOP: ++ { ++ struct hlsl_ir_loop *nested_loop; ++ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); ++ ++ if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) ++ return nested_loop; ++ ++ if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ { ++ *containing_block = block; ++ return loop; ++ } ++ ++ break; ++ } ++ case HLSL_IR_IF: ++ { ++ struct hlsl_ir_loop *loop; ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) ++ return loop; ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) ++ return loop; ++ ++ break; ++ } ++ case HLSL_IR_SWITCH: ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; ++ struct hlsl_ir_loop *loop; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ { ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) ++ return loop; ++ } ++ ++ break; ++ } ++ default: ++ break; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ while (true) ++ { ++ struct hlsl_block clone, *containing_block; ++ struct hlsl_ir_loop *loop, *cloned_loop; ++ ++ if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) ++ return; ++ ++ if (!hlsl_clone_block(ctx, &clone, block)) ++ return; ++ ++ cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); ++ assert(cloned_loop); ++ ++ if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) ++ { ++ hlsl_block_cleanup(&clone); ++ loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; ++ continue; ++ } ++ ++ hlsl_block_cleanup(block); ++ hlsl_block_init(block); ++ hlsl_block_add_block(block, &clone); ++ } ++} ++ + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) + { +@@ -5532,6 +6012,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } + ++ transform_unroll_loops(ctx, body); + hlsl_run_const_passes(ctx, body); + + remove_unreachable_code(ctx, body); +@@ -5541,7 +6022,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); + +- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); ++ hlsl_transform_ir(ctx, validate_dereferences, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); +@@ -5628,7 +6109,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + switch (target_type) + { + case VKD3D_SHADER_TARGET_D3D_BYTECODE: +- return hlsl_sm1_write(ctx, entry_func, out); ++ { ++ uint32_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vkd3d_shader_code ctab = {0}; ++ struct vsir_program program; ++ int result; ++ ++ sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); ++ if (ctx->result) ++ { ++ vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&ctab); ++ return ctx->result; ++ } ++ ++ result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); ++ vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&ctab); ++ return result; ++ } + + case VKD3D_SHADER_TARGET_DXBC_TPF: + return hlsl_sm4_write(ctx, entry_func, out); +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index b3b745fc1b2..e5432cb35ce 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -46,9 +46,9 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade + + static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) +- || handler_idx == VKD3DSIH_HS_DECLS; ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) ++ || opcode == VKD3DSIH_HS_DECLS; + } + + static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +@@ -60,9 +60,9 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i + + static bool vsir_instruction_init_with_params(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, +- enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) ++ enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) + { +- vsir_instruction_init(ins, location, handler_idx); ++ vsir_instruction_init(ins, location, opcode); + ins->dst_count = dst_count; + ins->src_count = src_count; + +@@ -287,7 +287,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro + mul_ins = &instructions->elements[pos]; + add_ins = &instructions->elements[pos + 1]; + +- mul_ins->handler_idx = VKD3DSIH_MUL; ++ mul_ins->opcode = VKD3DSIH_MUL; + mul_ins->src_count = 2; + + if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) +@@ -322,7 +322,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + { + struct vkd3d_shader_instruction *ins = &instructions->elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_IFC: + if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) +@@ -492,26 +492,26 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal + struct shader_phase_location *loc; + bool b; + +- if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) ++ if (ins->opcode == VKD3DSIH_HS_FORK_PHASE || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) + { + b = flattener_is_in_fork_or_join_phase(normaliser); + /* Reset the phase info. */ + normaliser->phase_body_idx = ~0u; +- normaliser->phase = ins->handler_idx; ++ normaliser->phase = ins->opcode; + normaliser->instance_count = 1; + /* Leave the first occurrence and delete the rest. */ + if (b) + vkd3d_shader_instruction_make_nop(ins); + return; + } +- else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT +- || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) ++ else if (ins->opcode == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT ++ || ins->opcode == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + { + normaliser->instance_count = ins->declaration.count + !ins->declaration.count; + vkd3d_shader_instruction_make_nop(ins); + return; + } +- else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( ++ else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( + &ins->declaration.dst.reg)) + { + vkd3d_shader_instruction_make_nop(ins); +@@ -524,7 +524,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal + if (normaliser->phase_body_idx == ~0u) + normaliser->phase_body_idx = index; + +- if (ins->handler_idx == VKD3DSIH_RET) ++ if (ins->opcode == VKD3DSIH_RET) + { + normaliser->last_ret_location = ins->location; + vkd3d_shader_instruction_make_nop(ins); +@@ -679,11 +679,11 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 + } + + void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, +- enum vkd3d_shader_opcode handler_idx) ++ enum vkd3d_shader_opcode opcode) + { + memset(ins, 0, sizeof(*ins)); + ins->location = *location; +- ins->handler_idx = handler_idx; ++ ins->opcode = opcode; + } + + static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, +@@ -865,12 +865,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + { + ins = &instructions->elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- normaliser.phase = ins->handler_idx; ++ normaliser.phase = ins->opcode; + break; + default: + if (vsir_instruction_is_dcl(ins)) +@@ -888,7 +888,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + { + ins = &instructions->elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + input_control_point_count = ins->declaration.count; +@@ -1526,7 +1526,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + struct vkd3d_shader_register *reg; + unsigned int i; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL_INPUT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) +@@ -1560,7 +1560,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- normaliser->phase = ins->handler_idx; ++ normaliser->phase = ins->opcode; + memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); + memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); + memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); +@@ -1594,7 +1594,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + { + ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + normaliser.output_control_point_count = ins->declaration.count; +@@ -1608,7 +1608,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + /* fall through */ + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- normaliser.phase = ins->handler_idx; ++ normaliser.phase = ins->opcode; + break; + default: + break; +@@ -1740,7 +1740,7 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) ++ if (ins->opcode == VKD3DSIH_DEF || ins->opcode == VKD3DSIH_DEFI || ins->opcode == VKD3DSIH_DEFB) + { + struct flat_constant_def *def; + +@@ -1779,7 +1779,7 @@ static void remove_dead_code(struct vsir_program *program) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_IF: + case VKD3DSIH_LOOP: +@@ -1799,7 +1799,7 @@ static void remove_dead_code(struct vsir_program *program) + { + if (depth > 0) + { +- if (ins->handler_idx != VKD3DSIH_ELSE) ++ if (ins->opcode != VKD3DSIH_ELSE) + --depth; + vkd3d_shader_instruction_make_nop(ins); + } +@@ -1870,14 +1870,14 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_src_param *srcs; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_TEX: + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(srcs, 0, sizeof(*srcs) * 3); + +- ins->handler_idx = VKD3DSIH_SAMPLE; ++ ins->opcode = VKD3DSIH_SAMPLE; + + srcs[0] = ins->src[0]; + +@@ -1899,13 +1899,42 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr + ins->src_count = 3; + break; + ++ case VKD3DSIH_TEXLDD: ++ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ memset(srcs, 0, sizeof(*srcs) * 5); ++ ++ ins->opcode = VKD3DSIH_SAMPLE_GRAD; ++ ++ srcs[0] = ins->src[0]; ++ ++ srcs[1].reg.type = VKD3DSPR_RESOURCE; ++ srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; ++ srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; ++ srcs[1].reg.idx_count = 2; ++ srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; ++ srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ srcs[2].reg.type = VKD3DSPR_SAMPLER; ++ srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; ++ srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; ++ srcs[2].reg.idx_count = 2; ++ srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; ++ ++ srcs[3] = ins->src[2]; ++ srcs[4] = ins->src[3]; ++ ++ ins->src = srcs; ++ ins->src_count = 5; ++ break; ++ + case VKD3DSIH_TEXBEM: + case VKD3DSIH_TEXBEML: + case VKD3DSIH_TEXCOORD: + case VKD3DSIH_TEXDEPTH: + case VKD3DSIH_TEXDP3: + case VKD3DSIH_TEXDP3TEX: +- case VKD3DSIH_TEXLDD: + case VKD3DSIH_TEXLDL: + case VKD3DSIH_TEXM3x2PAD: + case VKD3DSIH_TEXM3x2TEX: +@@ -1919,7 +1948,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr + case VKD3DSIH_TEXREG2RGB: + vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " +- "Combined sampler instruction %#x.", ins->handler_idx); ++ "Combined sampler instruction %#x.", ins->opcode); + return VKD3D_ERROR_NOT_IMPLEMENTED; + + default: +@@ -2030,7 +2059,7 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, + { + struct vkd3d_shader_instruction *dst_ins; + +- if (instruction->handler_idx == VKD3DSIH_NOP) ++ if (instruction->opcode == VKD3DSIH_NOP) + return true; + + if (!(dst_ins = cf_flattener_require_space(flattener, 1))) +@@ -2245,9 +2274,9 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + * phase instruction, and in all other shader types begins with the first label instruction. + * Declaring an indexable temp with function scope is not considered a declaration, + * because it needs to live inside a function. */ +- if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) ++ if (!after_declarations_section && instruction->opcode != VKD3DSIH_NOP) + { +- bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP ++ bool is_function_indexable = instruction->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP + && instruction->declaration.indexable_temp.has_function_scope; + + if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) +@@ -2260,14 +2289,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + cf_info = flattener->control_flow_depth + ? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + if (!cf_flattener_copy_instruction(flattener, instruction)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) ++ if (instruction->opcode != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) + after_declarations_section = false; + break; + +@@ -2662,7 +2691,7 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + unsigned int case_count, j, default_label; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: + current_label = label_from_src_param(&ins->src[0]); +@@ -2858,7 +2887,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ + + /* Only phi src/dst SSA values need be converted here. Structurisation may + * introduce new cases of undominated SSA use, which will be handled later. */ +- if (ins->handler_idx != VKD3DSIH_PHI) ++ if (ins->opcode != VKD3DSIH_PHI) + continue; + ++phi_count; + +@@ -2907,7 +2936,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: + current_label = label_from_src_param(&ins->src[0]); +@@ -3336,7 +3365,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) + if (block->label == 0) + continue; + +- switch (block->end->handler_idx) ++ switch (block->end->opcode) + { + case VKD3DSIH_RET: + shape = "trapezium"; +@@ -3478,7 +3507,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; + bool finish = false; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_PHI: + case VKD3DSIH_SWITCH_MONOLITHIC: +@@ -3533,7 +3562,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + if (block->label == 0) + continue; + +- switch (block->end->handler_idx) ++ switch (block->end->opcode) + { + case VKD3DSIH_RET: + break; +@@ -4192,7 +4221,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + structure->u.block = block; + + /* Generate between zero and two jump instructions. */ +- switch (block->end->handler_idx) ++ switch (block->end->opcode) + { + case VKD3DSIH_BRANCH: + { +@@ -5049,7 +5078,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: + assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); +@@ -5064,7 +5093,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); +- TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); ++ TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); + target.instructions[target.ins_count++] = *ins; + ++i; + if ((ret = vsir_program_structurize_function(program, message_context, +@@ -5222,7 +5251,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: + assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); +@@ -5237,7 +5266,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); +- TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); ++ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); + ++i; + if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( + program, message_context, &i)) < 0) +@@ -5641,7 +5670,7 @@ static void vsir_validate_dst_count(struct validation_context *ctx, + if (instruction->dst_count != count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, + "Invalid destination count %u for an instruction of type %#x, expected %u.", +- instruction->dst_count, instruction->handler_idx, count); ++ instruction->dst_count, instruction->opcode, count); + } + + static void vsir_validate_src_count(struct validation_context *ctx, +@@ -5650,7 +5679,7 @@ static void vsir_validate_src_count(struct validation_context *ctx, + if (instruction->src_count != count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected %u.", +- instruction->src_count, instruction->handler_idx, count); ++ instruction->src_count, instruction->opcode, count); + } + + static bool vsir_validate_src_min_count(struct validation_context *ctx, +@@ -5660,7 +5689,7 @@ static bool vsir_validate_src_min_count(struct validation_context *ctx, + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected at least %u.", +- instruction->src_count, instruction->handler_idx, count); ++ instruction->src_count, instruction->opcode, count); + return false; + } + +@@ -5674,7 +5703,7 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected at most %u.", +- instruction->src_count, instruction->handler_idx, count); ++ instruction->src_count, instruction->opcode, count); + return false; + } + +@@ -5701,7 +5730,7 @@ static void vsir_validate_cf_type(struct validation_context *ctx, + assert(expected_type != CF_TYPE_UNKNOWN); + if (ctx->cf_type != expected_type) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", +- instruction->handler_idx, name_from_cf_type(ctx->cf_type)); ++ instruction->opcode, name_from_cf_type(ctx->cf_type)); + } + + static void vsir_validate_instruction(struct validation_context *ctx) +@@ -5718,13 +5747,13 @@ static void vsir_validate_instruction(struct validation_context *ctx) + for (i = 0; i < instruction->src_count; ++i) + vsir_validate_src_param(ctx, &instruction->src[i]); + +- if (instruction->handler_idx >= VKD3DSIH_INVALID) ++ if (instruction->opcode >= VKD3DSIH_INVALID) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", +- instruction->handler_idx); ++ instruction->opcode); + } + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: +@@ -5733,12 +5762,14 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_dst_count(ctx, instruction, 0); + vsir_validate_src_count(ctx, instruction, 0); + if (version->type != VKD3D_SHADER_TYPE_HULL) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.", +- instruction->handler_idx); ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, ++ "Phase instruction %#x is only valid in a hull shader.", ++ instruction->opcode); + if (ctx->depth != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.", +- instruction->handler_idx); +- ctx->phase = instruction->handler_idx; ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "Phase instruction %#x must appear to top level.", ++ instruction->opcode); ++ ctx->phase = instruction->opcode; + ctx->dcl_temps_found = false; + return; + +@@ -5812,7 +5843,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + && ctx->phase == VKD3DSIH_INVALID) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Instruction %#x appear before any phase instruction in a hull shader.", +- instruction->handler_idx); ++ instruction->opcode); + + /* We support two different control flow types in shaders: + * block-based, like DXIL and SPIR-V, and structured, like D3DBC +@@ -5824,7 +5855,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + * block, but need for that hasn't arisen yet, so we don't. */ + if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) + { +- if (instruction->handler_idx == VKD3DSIH_LABEL) ++ if (instruction->opcode == VKD3DSIH_LABEL) + ctx->cf_type = CF_TYPE_BLOCKS; + else + ctx->cf_type = CF_TYPE_STRUCTURED; +@@ -5832,7 +5863,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + + if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) + { +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_LABEL: + if (ctx->inside_block) +@@ -5844,20 +5875,22 @@ static void vsir_validate_instruction(struct validation_context *ctx) + case VKD3DSIH_BRANCH: + case VKD3DSIH_SWITCH_MONOLITHIC: + if (!ctx->inside_block) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", +- instruction->handler_idx); ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "Invalid instruction %#x outside any block.", ++ instruction->opcode); + ctx->inside_block = false; + break; + + default: + if (!ctx->inside_block) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", +- instruction->handler_idx); ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "Invalid instruction %#x outside any block.", ++ instruction->opcode); + break; + } + } + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DCL_TEMPS: + vsir_validate_dst_count(ctx, instruction, 0); +@@ -5877,7 +5910,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, 1); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_IFC: +@@ -5896,7 +5929,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); + else +- ctx->blocks[ctx->depth - 1] = instruction->handler_idx; ++ ctx->blocks[ctx->depth - 1] = instruction->opcode; + break; + + case VKD3DSIH_ENDIF: +@@ -5915,7 +5948,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_ENDLOOP: +@@ -5934,7 +5967,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, 1); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_ENDREP: +@@ -5953,7 +5986,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, 1); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_ENDSWITCH: +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l +index be50d3b9020..a3cdbe559a7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l +@@ -20,6 +20,7 @@ + + %{ + ++#include "preproc.h" + #include "preproc.tab.h" + + #undef ERROR /* defined in wingdi.h */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 984a4f894f6..524fb8e9b1f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -1752,6 +1752,22 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu + return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); + } + ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_swap(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t op_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadSwap, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, op_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t index_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadBroadcast, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, index_id); ++} ++ + static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id) + { +@@ -6831,7 +6847,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, + uint32_t function_id, void_id, function_type_id; + struct vkd3d_shader_phase *phase; + +- assert(compiler->phase != instruction->handler_idx); ++ assert(compiler->phase != instruction->opcode); + + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler); +@@ -6843,16 +6859,16 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_function(builder, void_id, function_id, + SpvFunctionControlMaskNone, function_type_id); + +- compiler->phase = instruction->handler_idx; ++ compiler->phase = instruction->opcode; + spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); + +- phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ phase = (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) + ? &compiler->control_point_phase : &compiler->patch_constant_phase; + phase->function_id = function_id; + /* The insertion location must be set after the label is emitted. */ + phase->function_location = 0; + +- if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) + compiler->emit_default_control_point_phase = instruction->flags; + } + +@@ -7016,7 +7032,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + { + static const struct + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + SpvOp spirv_op; + } + alu_ops[] = +@@ -7056,7 +7072,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + + for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) + { +- if (alu_ops[i].handler_idx == instruction->handler_idx) ++ if (alu_ops[i].opcode == instruction->opcode) + return alu_ops[i].spirv_op; + } + +@@ -7065,7 +7081,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + + static SpvOp spirv_compiler_map_logical_instruction(const struct vkd3d_shader_instruction *instruction) + { +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_AND: + return SpvOpLogicalAnd; +@@ -7090,20 +7106,20 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) + { +- val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); ++ val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); + } + else if (dst->reg.data_type == VKD3D_DATA_DOUBLE) + { + /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ +- val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); ++ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); + } + else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) + { +- val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); ++ val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); + } + else if (dst->reg.data_type == VKD3D_DATA_UINT64) + { +- val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); ++ val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); + } + else + { +@@ -7126,7 +7142,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + SpvOp op = SpvOpMax; + unsigned int i; + +- if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) ++ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->opcode == VKD3DSIH_COUNTBITS) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ + FIXME("Unsupported 64-bit source for bit count.\n"); +@@ -7142,8 +7158,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + /* VSIR supports logic ops AND/OR/XOR on bool values. */ + op = spirv_compiler_map_logical_instruction(instruction); + } +- else if (instruction->handler_idx == VKD3DSIH_ITOF || instruction->handler_idx == VKD3DSIH_UTOF +- || instruction->handler_idx == VKD3DSIH_ITOI || instruction->handler_idx == VKD3DSIH_UTOU) ++ else if (instruction->opcode == VKD3DSIH_ITOF || instruction->opcode == VKD3DSIH_UTOF ++ || instruction->opcode == VKD3DSIH_ITOI || instruction->opcode == VKD3DSIH_UTOU) + { + /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, + * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ +@@ -7158,9 +7174,9 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + + if (op == SpvOpMax) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, +- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); ++ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); + return VKD3D_ERROR_INVALID_SHADER; + } + +@@ -7179,8 +7195,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + * Microsoft fxc will compile immediate constants larger than 5 bits. + * Fixing up the constants would be more elegant, but the simplest way is + * to let this handle constants too. */ +- if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->handler_idx == VKD3DSIH_ISHL +- || instruction->handler_idx == VKD3DSIH_ISHR || instruction->handler_idx == VKD3DSIH_USHR)) ++ if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->opcode == VKD3DSIH_ISHL ++ || instruction->opcode == VKD3DSIH_ISHR || instruction->opcode == VKD3DSIH_USHR)) + { + uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, + VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); +@@ -7218,7 +7234,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( + { + static const struct + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + enum GLSLstd450 glsl_inst; + } + glsl_insts[] = +@@ -7258,7 +7274,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( + + for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) + { +- if (glsl_insts[i].handler_idx == instruction->handler_idx) ++ if (glsl_insts[i].opcode == instruction->opcode) + return glsl_insts[i].glsl_inst; + } + +@@ -7276,20 +7292,20 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp + unsigned int i, component_count; + enum GLSLstd450 glsl_inst; + +- if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI +- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) ++ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->opcode == VKD3DSIH_FIRSTBIT_HI ++ || instruction->opcode == VKD3DSIH_FIRSTBIT_LO || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI)) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ +- FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); ++ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->opcode); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, +- "64-bit source for handler %#x is not supported.", instruction->handler_idx); ++ "64-bit source for handler %#x is not supported.", instruction->opcode); + return; + } + + glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); + if (glsl_inst == GLSLstd450Bad) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -7306,8 +7322,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp + val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, + instr_set_id, glsl_inst, src_id, instruction->src_count); + +- if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI +- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) ++ if (instruction->opcode == VKD3DSIH_FIRSTBIT_HI ++ || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI) + { + /* In D3D bits are numbered from the most significant bit. */ + component_count = vsir_write_mask_component_count(dst->write_mask); +@@ -7415,7 +7431,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, + + if (src[0].reg.data_type != VKD3D_DATA_BOOL) + { +- if (instruction->handler_idx == VKD3DSIH_CMP) ++ if (instruction->opcode == VKD3DSIH_CMP) + condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, + vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, + spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); +@@ -7469,9 +7485,9 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, + component_count = vsir_write_mask_component_count(dst->write_mask); + component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); + +- if (instruction->handler_idx == VKD3DSIH_DP4) ++ if (instruction->opcode == VKD3DSIH_DP4) + write_mask = VKD3DSP_WRITEMASK_ALL; +- else if (instruction->handler_idx == VKD3DSIH_DP3) ++ else if (instruction->opcode == VKD3DSIH_DP3) + write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; + else + write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; +@@ -7606,8 +7622,8 @@ static void spirv_compiler_emit_int_div(struct spirv_compiler *compiler, + unsigned int component_count = 0; + SpvOp div_op, mod_op; + +- div_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; +- mod_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; ++ div_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; ++ mod_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; + + if (dst[0].reg.type != VKD3DSPR_NULL) + { +@@ -7778,13 +7794,13 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp + mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); + size_id = spirv_compiler_get_constant_uint(compiler, size); + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; + case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; + case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; + default: +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -7895,7 +7911,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co + unsigned int component_count; + SpvOp op; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DEQO: + case VKD3DSIH_EQO: op = SpvOpFOrdEqual; break; +@@ -7916,7 +7932,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co + case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; + case VKD3DSIH_ULT: op = SpvOpULessThan; break; + default: +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -7949,7 +7965,7 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c + src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); + src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); + val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); +- if (instruction->handler_idx == VKD3DSIH_ORD) ++ if (instruction->opcode == VKD3DSIH_ORD) + val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } +@@ -7964,7 +7980,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil + unsigned int component_count; + SpvOp op; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; + case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; +@@ -8262,7 +8278,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile + + static const struct instruction_info + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + SpvOp op; + bool needs_derivative_control; + } +@@ -8279,7 +8295,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile + info = NULL; + for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) + { +- if (deriv_instructions[i].handler_idx == instruction->handler_idx) ++ if (deriv_instructions[i].opcode == instruction->opcode) + { + info = &deriv_instructions[i]; + break; +@@ -8287,7 +8303,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile + } + if (!info) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -8497,7 +8513,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, + uint32_t coordinate_mask; + bool multisample; + +- multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; ++ multisample = instruction->opcode == VKD3DSIH_LD2DMS; + + spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); + +@@ -8576,7 +8592,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, + spirv_compiler_prepare_image(compiler, &image, + &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_SAMPLE: + op = SpvOpImageSampleImplicitLod; +@@ -8603,7 +8619,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, + &src[3], VKD3DSP_WRITEMASK_0); + break; + default: +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -8637,7 +8653,7 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, + uint32_t image_operands[2]; + SpvOp op; + +- if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) ++ if (instruction->opcode == VKD3DSIH_SAMPLE_C_LZ) + { + op = SpvOpImageSampleDrefExplicitLod; + operands_mask |= SpvImageOperandsLodMask; +@@ -8687,12 +8703,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, + uint32_t coordinate_mask; + bool extended_offset; + +- if (instruction->handler_idx == VKD3DSIH_GATHER4_C +- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) ++ if (instruction->opcode == VKD3DSIH_GATHER4_C ++ || instruction->opcode == VKD3DSIH_GATHER4_PO_C) + image_flags |= VKD3D_IMAGE_FLAG_DEPTH; + +- extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO +- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; ++ extended_offset = instruction->opcode == VKD3DSIH_GATHER4_PO ++ || instruction->opcode == VKD3DSIH_GATHER4_PO_C; + + addr = &src[0]; + offset = extended_offset ? &src[1] : NULL; +@@ -8963,7 +8979,6 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * + { + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); +- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); + base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + +@@ -9007,7 +9022,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); +- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); + base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + +@@ -9145,7 +9159,7 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c + uint32_t operands[3]; + SpvOp op; + +- op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC ++ op = instruction->opcode == VKD3DSIH_IMM_ATOMIC_ALLOC + ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; + + resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); +@@ -9211,7 +9225,7 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins + { + static const struct + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + SpvOp spirv_op; + } + atomic_ops[] = +@@ -9240,16 +9254,16 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins + + for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) + { +- if (atomic_ops[i].handler_idx == instruction->handler_idx) ++ if (atomic_ops[i].opcode == instruction->opcode) + return atomic_ops[i].spirv_op; + } + + return SpvOpMax; + } + +-static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) ++static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode opcode) + { +- return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; ++ return VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR; + } + + static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compiler, +@@ -9274,12 +9288,12 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + bool raw; + SpvOp op; + +- resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; ++ resource = is_imm_atomic_instruction(instruction->opcode) ? &dst[1] : &dst[0]; + + op = spirv_compiler_map_atomic_instruction(instruction); + if (op == SpvOpMax) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -9360,7 +9374,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + { + WARN("Ignoring 'volatile' attribute.\n"); + spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, +- "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); ++ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->opcode); + } + + memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) +@@ -9379,7 +9393,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, + op, type_id, operands, i); + +- if (is_imm_atomic_instruction(instruction->handler_idx)) ++ if (is_imm_atomic_instruction(instruction->opcode)) + spirv_compiler_emit_store_dst(compiler, dst, result_id); + } + +@@ -9684,13 +9698,13 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, + + src_ids[src_count++] = register_info.id; + +- if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) ++ if (instruction->opcode == VKD3DSIH_EVAL_CENTROID) + { + op = GLSLstd450InterpolateAtCentroid; + } + else + { +- assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); ++ assert(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); + op = GLSLstd450InterpolateAtSample; + src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); + } +@@ -9772,7 +9786,7 @@ static void spirv_compiler_emit_emit_stream(struct spirv_compiler *compiler, + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int stream_idx; + +- if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) ++ if (instruction->opcode == VKD3DSIH_EMIT_STREAM) + stream_idx = instruction->src[0].reg.idx[0].offset; + else + stream_idx = 0; +@@ -9793,7 +9807,7 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int stream_idx; + +- if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) ++ if (instruction->opcode == VKD3DSIH_CUT_STREAM) + stream_idx = instruction->src[0].reg.idx[0].offset; + else + stream_idx = 0; +@@ -9807,9 +9821,68 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_end_primitive(builder); + } + +-static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) ++static uint32_t map_quad_read_across_direction(enum vkd3d_shader_opcode opcode) + { +- switch (handler_idx) ++ switch (opcode) ++ { ++ case VKD3DSIH_QUAD_READ_ACROSS_X: ++ return 0; ++ case VKD3DSIH_QUAD_READ_ACROSS_Y: ++ return 1; ++ case VKD3DSIH_QUAD_READ_ACROSS_D: ++ return 2; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, direction_type_id, direction_id, val_id; ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VKD3D_DATA_UINT, 1); ++ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); ++ direction_id = map_quad_read_across_direction(instruction->opcode); ++ direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_swap(builder, type_id, val_id, direction_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id, lane_id; ++ ++ if (!register_is_constant_or_undef(&src[1].reg)) ++ { ++ FIXME("Unsupported non-constant quad read lane index.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "Non-constant quad read lane indices are not supported."); ++ return; ++ } ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode opcode) ++{ ++ switch (opcode) + { + case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: + return SpvOpGroupNonUniformAllEqual; +@@ -9833,7 +9906,7 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, + + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); + +- op = map_wave_bool_op(instruction->handler_idx); ++ op = map_wave_bool_op(instruction->opcode); + type_id = vkd3d_spirv_get_op_type_bool(builder); + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, +@@ -9865,9 +9938,9 @@ static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compil + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +-static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) ++static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode opcode, bool is_float) + { +- switch (handler_idx) ++ switch (opcode) + { + case VKD3DSIH_WAVE_ACTIVE_BIT_AND: + return SpvOpGroupNonUniformBitwiseAnd; +@@ -9905,7 +9978,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, + uint32_t type_id, val_id; + SpvOp op; + +- op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); ++ op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type)); + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); +@@ -9928,7 +10001,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, + SpvGroupOperation group_op; + uint32_t type_id, val_id; + +- group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan ++ group_op = (instruction->opcode == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan + : SpvGroupOperationReduce; + + val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); +@@ -10014,7 +10087,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + + compiler->location = instruction->location; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DCL_GLOBAL_FLAGS: + spirv_compiler_emit_dcl_global_flags(compiler, instruction); +@@ -10337,6 +10410,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_CUT_STREAM: + spirv_compiler_emit_cut_stream(compiler, instruction); + break; ++ case VKD3DSIH_QUAD_READ_ACROSS_D: ++ case VKD3DSIH_QUAD_READ_ACROSS_X: ++ case VKD3DSIH_QUAD_READ_ACROSS_Y: ++ spirv_compiler_emit_quad_read_across(compiler, instruction); ++ break; ++ case VKD3DSIH_QUAD_READ_LANE_AT: ++ spirv_compiler_emit_quad_read_lane_at(compiler, instruction); ++ break; + case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: + case VKD3DSIH_WAVE_ALL_TRUE: + case VKD3DSIH_WAVE_ANY_TRUE: +@@ -10390,9 +10471,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + /* nothing to do */ + break; + default: +- FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); ++ FIXME("Unhandled instruction %#x.\n", instruction->opcode); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, +- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); ++ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); + break; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index b562e815a81..a7c37215e5e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -780,7 +780,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui + if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) + { + FIXME("Ignoring shader data type %#x.\n", type); +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + return; + } + +@@ -789,7 +789,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui + if (icb_size % 4) + { + FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + +@@ -797,7 +797,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui + { + ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + icb->register_idx = 0; +@@ -2395,16 +2395,16 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + if (!(opcode_info = get_info_from_sm4_opcode(&sm4->lookup, opcode))) + { + FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + *ptr += len; + return; + } + + vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); +- if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE +- || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) +- sm4->phase = ins->handler_idx; +- sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; ++ if (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->opcode == VKD3DSIH_HS_FORK_PHASE ++ || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) ++ sm4->phase = ins->opcode; ++ sm4->has_control_point_phase |= ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE; + ins->flags = 0; + ins->coissue = false; + ins->raw = false; +@@ -2417,7 +2417,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + { + ERR("Failed to allocate src parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; +@@ -2459,7 +2459,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + { + ERR("Failed to allocate dst parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + for (i = 0; i < ins->dst_count; ++i) +@@ -2467,7 +2467,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), + &dst_params[i]))) + { +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + dst_params[i].modifiers |= instruction_dst_modifier; +@@ -2478,7 +2478,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), + &src_params[i]))) + { +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + } +@@ -2488,7 +2488,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + + fail: + *ptr = sm4->end; +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + +@@ -2693,7 +2693,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + ins = &instructions->elements[instructions->count]; + shader_sm4_read_instruction(&sm4, ins); + +- if (ins->handler_idx == VKD3DSIH_INVALID) ++ if (ins->opcode == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + vsir_program_cleanup(program); +@@ -2762,6 +2762,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, ++ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) +@@ -2817,6 +2818,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, ++ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, +@@ -2984,11 +2986,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + return D3D_SVC_VECTOR; + + case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: +@@ -2997,6 +3001,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: + break; + } + vkd3d_unreachable(); +@@ -3178,13 +3183,14 @@ struct extern_resource + /* var is only not NULL if this resource is a whole variable, so it may be responsible for more + * than one component. */ + const struct hlsl_ir_var *var; ++ const struct hlsl_buffer *buffer; + + char *name; + struct hlsl_type *data_type; + bool is_user_packed; + + enum hlsl_regset regset; +- unsigned int id, bind_count; ++ unsigned int id, space, index, bind_count; + }; + + static int sm4_compare_extern_resources(const void *a, const void *b) +@@ -3196,7 +3202,10 @@ static int sm4_compare_extern_resources(const void *a, const void *b) + if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) + return r; + +- return vkd3d_u32_compare(aa->id, bb->id); ++ if ((r = vkd3d_u32_compare(aa->space, bb->space))) ++ return r; ++ ++ return vkd3d_u32_compare(aa->index, bb->index); + } + + static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +@@ -3220,6 +3229,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; + struct extern_resource *extern_resources = NULL; + const struct hlsl_ir_var *var; ++ struct hlsl_buffer *buffer; + enum hlsl_regset regset; + size_t capacity = 0; + char *name; +@@ -3272,13 +3282,16 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + hlsl_release_string_buffer(ctx, name_buffer); + + extern_resources[*count].var = NULL; ++ extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = component_type; + extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + + extern_resources[*count].regset = regset; +- extern_resources[*count].id = var->regs[regset].id + regset_offset; ++ extern_resources[*count].id = var->regs[regset].id; ++ extern_resources[*count].space = var->regs[regset].space; ++ extern_resources[*count].index = var->regs[regset].index + regset_offset; + extern_resources[*count].bind_count = 1; + + ++*count; +@@ -3313,13 +3326,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + } + + extern_resources[*count].var = var; ++ extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type; +- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; ++ /* For some reason 5.1 resources aren't marked as ++ * user-packed, but cbuffers still are. */ ++ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) ++ && !!var->reg_reservation.reg_type; + + extern_resources[*count].regset = r; + extern_resources[*count].id = var->regs[r].id; ++ extern_resources[*count].space = var->regs[r].space; ++ extern_resources[*count].index = var->regs[r].index; + extern_resources[*count].bind_count = var->bind_count[r]; + + ++*count; +@@ -3327,14 +3346,51 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + } + } + ++ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (!buffer->reg.allocated) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, ++ sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name = hlsl_strdup(ctx, buffer->name))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ extern_resources[*count].var = NULL; ++ extern_resources[*count].buffer = buffer; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].data_type = NULL; ++ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; ++ ++ extern_resources[*count].regset = HLSL_REGSET_NUMERIC; ++ extern_resources[*count].id = buffer->reg.id; ++ extern_resources[*count].space = buffer->reg.space; ++ extern_resources[*count].index = buffer->reg.index; ++ extern_resources[*count].bind_count = 1; ++ ++ ++*count; ++ } ++ + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); + return extern_resources; + } + + static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + { +- unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; ++ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; ++ unsigned int cbuffer_count = 0, extern_resources_count, i, j; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; + struct vkd3d_bytecode_buffer buffer = {0}; +@@ -3354,19 +3410,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + +- resource_count += extern_resources_count; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- { + ++cbuffer_count; +- ++resource_count; +- } + } + + put_u32(&buffer, cbuffer_count); + cbuffer_position = put_u32(&buffer, 0); +- put_u32(&buffer, resource_count); ++ put_u32(&buffer, extern_resources_count); + resource_position = put_u32(&buffer, 0); + put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), + target_types[profile->type])); +@@ -3378,7 +3430,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ +- put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ ++ put_u32(&buffer, binding_desc_size); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ +@@ -3395,21 +3447,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + const struct extern_resource *resource = &extern_resources[i]; + uint32_t flags = 0; + +- if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); +- + if (resource->is_user_packed) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, sm4_resource_type(resource->data_type)); +- if (resource->regset == HLSL_REGSET_SAMPLERS) +- { +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- } ++ if (resource->buffer) ++ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + else ++ put_u32(&buffer, sm4_resource_type(resource->data_type)); ++ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) + { + unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; + +@@ -3418,32 +3464,21 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } +- put_u32(&buffer, resource->id); ++ else ++ { ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ } ++ put_u32(&buffer, resource->index); + put_u32(&buffer, resource->bind_count); + put_u32(&buffer, flags); +- } +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- uint32_t flags = 0; +- +- if (!cbuffer->reg.allocated) +- continue; + + if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); +- +- if (cbuffer->reservation.reg_type) +- flags |= D3D_SIF_USERPACKED; +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); +- put_u32(&buffer, 0); /* return type */ +- put_u32(&buffer, 0); /* dimension */ +- put_u32(&buffer, 0); /* multisample count */ +- put_u32(&buffer, cbuffer->reg.id); /* bind point */ +- put_u32(&buffer, 1); /* bind count */ +- put_u32(&buffer, flags); /* flags */ ++ { ++ put_u32(&buffer, resource->space); ++ put_u32(&buffer, resource->id); ++ } + } + + for (i = 0; i < extern_resources_count; ++i) +@@ -3451,16 +3486,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + const struct extern_resource *resource = &extern_resources[i]; + + string_offset = put_string(&buffer, resource->name); +- set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); +- } +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (!cbuffer->reg.allocated) +- continue; +- +- string_offset = put_string(&buffer, cbuffer->name); +- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); ++ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); + } + + /* Buffers. */ +@@ -3522,7 +3548,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); + put_u32(&buffer, flags); + put_u32(&buffer, 0); /* type */ +- put_u32(&buffer, 0); /* FIXME: default value */ ++ put_u32(&buffer, 0); /* default value */ + + if (profile->major_version >= 5) + { +@@ -3546,6 +3572,34 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); ++ ++ if (var->default_values) ++ { ++ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int comp_count = hlsl_type_component_count(var->data_type); ++ unsigned int default_value_offset; ++ unsigned int k; ++ ++ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); ++ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); ++ ++ for (k = 0; k < comp_count; ++k) ++ { ++ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ unsigned int comp_offset; ++ enum hlsl_regset regset; ++ ++ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); ++ if (regset == HLSL_REGSET_NUMERIC) ++ { ++ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) ++ hlsl_fixme(ctx, &var->loc, "Write double default values."); ++ ++ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), ++ var->default_values[k].value.u); ++ } ++ } ++ } + ++j; + } + } +@@ -3720,30 +3774,57 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } + assert(regset == HLSL_REGSET_TEXTURES); +- reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } + assert(regset == HLSL_REGSET_UAVS); +- reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; +- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } + assert(regset == HLSL_REGSET_SAMPLERS); +- reg->idx_count = 1; + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else +@@ -3753,9 +3834,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + assert(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = var->buffer->reg.id; +- reg->idx[1].offset = offset / 4; +- reg->idx_count = 2; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ ++ reg->idx[2].offset = offset / 4; ++ reg->idx_count = 3; ++ } ++ else ++ { ++ reg->idx[0].offset = var->buffer->reg.index; ++ reg->idx[1].offset = offset / 4; ++ reg->idx_count = 2; ++ } + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } +@@ -4139,18 +4230,36 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + + static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) + { +- const struct sm4_instruction instr = ++ size_t size = (cbuffer->used_size + 3) / 4; ++ ++ struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, + .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, +- .srcs[0].reg.idx[0].offset = cbuffer->reg.id, +- .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, +- .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, + .src_count = 1, + }; ++ ++ if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ { ++ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; ++ instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; ++ instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ ++ instr.srcs[0].reg.idx_count = 3; ++ ++ instr.idx[0] = size; ++ instr.idx[1] = cbuffer->reg.space; ++ instr.idx_count = 2; ++ } ++ else ++ { ++ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; ++ instr.srcs[0].reg.idx[1].offset = size; ++ instr.srcs[0].reg.idx_count = 2; ++ } ++ + write_sm4_instruction(tpf, &instr); + } + +@@ -4163,7 +4272,6 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3DSPR_SAMPLER, +- .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + +@@ -4179,7 +4287,22 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + +- instr.dsts[0].reg.idx[0].offset = resource->id + i; ++ if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ { ++ assert(!i); ++ instr.dsts[0].reg.idx[0].offset = resource->id; ++ instr.dsts[0].reg.idx[1].offset = resource->index; ++ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ ++ instr.dsts[0].reg.idx_count = 3; ++ ++ instr.idx[0] = resource->space; ++ instr.idx_count = 1; ++ } ++ else ++ { ++ instr.dsts[0].reg.idx[0].offset = resource->index + i; ++ instr.dsts[0].reg.idx_count = 1; ++ } + write_sm4_instruction(tpf, &instr); + } + } +@@ -4212,6 +4335,23 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + .idx_count = 1, + }; + ++ if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ { ++ assert(!i); ++ instr.dsts[0].reg.idx[0].offset = resource->id; ++ instr.dsts[0].reg.idx[1].offset = resource->index; ++ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ ++ instr.dsts[0].reg.idx_count = 3; ++ ++ instr.idx[1] = resource->space; ++ instr.idx_count = 2; ++ } ++ else ++ { ++ instr.dsts[0].reg.idx[0].offset = resource->index + i; ++ instr.dsts[0].reg.idx_count = 1; ++ } ++ + if (uav) + { + switch (resource->data_type->sampler_dim) +@@ -4904,6 +5044,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct + write_sm4_instruction(tpf, &instr); + } + ++static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; ++ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; ++ instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ + static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) + { + const struct hlsl_ir_node *arg1 = expr->operands[0].node; +@@ -4919,6 +5078,14 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + + switch (expr->op) + { ++ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: ++ if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) ++ write_sm4_rasterizer_sample_count(tpf, &expr->node); ++ else ++ hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); ++ break; ++ + case HLSL_OP1_ABS: + switch (dst_type->e.numeric.type) + { +@@ -5799,21 +5966,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- { +- if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); +- + write_sm4_dcl_constant_buffer(&tpf, cbuffer); +- } + } + + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + +- if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); +- + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) +@@ -5875,7 +6034,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + for (unsigned int i = 0; i < extern_resources_count; ++i) + { +- if (extern_resources[i].data_type->e.resource.rasteriser_ordered) ++ if (extern_resources[i].data_type && extern_resources[i].data_type->e.resource.rasteriser_ordered) + *flags |= VKD3D_SM4_REQUIRES_ROVS; + } + sm4_free_extern_resources(extern_resources, extern_resources_count); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 14a3fa778e5..fdbde019111 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -23,6 +23,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -847,12 +849,13 @@ static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_contex + + static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) +- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) +- || handler_idx == VKD3DSIH_LD_UAV_TYPED +- || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) +- || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ ++ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) ++ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR) ++ || opcode == VKD3DSIH_LD_UAV_TYPED ++ || (opcode == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) ++ || (opcode == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); + } + + static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, +@@ -863,9 +866,9 @@ static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context * + + static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC +- || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ ++ return opcode == VKD3DSIH_IMM_ATOMIC_ALLOC || opcode == VKD3DSIH_IMM_ATOMIC_CONSUME; + } + + static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, +@@ -876,9 +879,10 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_contex + + static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) +- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR); ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ ++ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) ++ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR); + } + + static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, +@@ -1130,7 +1134,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + + context->location = instruction->location; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DCL_CONSTANT_BUFFER: + vkd3d_shader_scan_constant_buffer_declaration(context, instruction); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 29b8d6ad022..96e613669a6 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -151,6 +151,8 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, + VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, + VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, ++ VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, ++ VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +@@ -455,6 +457,10 @@ enum vkd3d_shader_opcode + VKD3DSIH_PHASE, + VKD3DSIH_PHI, + VKD3DSIH_POW, ++ VKD3DSIH_QUAD_READ_ACROSS_D, ++ VKD3DSIH_QUAD_READ_ACROSS_X, ++ VKD3DSIH_QUAD_READ_ACROSS_Y, ++ VKD3DSIH_QUAD_READ_LANE_AT, + VKD3DSIH_RCP, + VKD3DSIH_REP, + VKD3DSIH_RESINFO, +@@ -805,6 +811,7 @@ enum vkd3d_tessellator_domain + + #define VKD3DSI_NONE 0x0 + #define VKD3DSI_TEXLD_PROJECT 0x1 ++#define VKD3DSI_TEXLD_BIAS 0x2 + #define VKD3DSI_INDEXED_DYNAMIC 0x4 + #define VKD3DSI_RESINFO_RCP_FLOAT 0x1 + #define VKD3DSI_RESINFO_UINT 0x2 +@@ -1189,7 +1196,7 @@ struct vkd3d_shader_location + struct vkd3d_shader_instruction + { + struct vkd3d_shader_location location; +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + uint32_t flags; + unsigned int dst_count; + unsigned int src_count; +@@ -1238,8 +1245,8 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns + return v->major < major || (v->major == major && v->minor <= minor); + } + +-void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, +- enum vkd3d_shader_opcode handler_idx); ++void vsir_instruction_init(struct vkd3d_shader_instruction *ins, ++ const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode); + + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) + { +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 95366d3441b..2354938c08d 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -2025,7 +2025,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l + + static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, + const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, +- VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) ++ VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout, ++ struct d3d12_device *device) + { + bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); + VkPipelineStageFlags queue_shader_stages = 0; +@@ -2033,10 +2034,12 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, + if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) + { + queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT +- | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT +- | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT +- | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT + | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; ++ if (device->vk_info.geometry_shaders) ++ queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; ++ if (device->vk_info.tessellation_shaders) ++ queue_shader_stages |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT ++ | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + } + if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) + queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; +@@ -2054,7 +2057,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, + { + if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) + return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, +- resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); ++ resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout, device); + + *access_mask = VK_ACCESS_MEMORY_READ_BIT; + *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; +@@ -2251,7 +2254,8 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 + VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; + + if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, +- resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) ++ resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, ++ &dst_stage_mask, &barrier.newLayout, list->device)) + { + FIXME("Unhandled state %#x.\n", resource->initial_state); + return; +@@ -4277,13 +4281,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC + } + + if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, +- resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) ++ resource, list->vk_queue_flags, vk_info, &src_access_mask, ++ &src_stage_mask, &layout_before, list->device)) + { + FIXME("Unhandled state %#x.\n", state_before); + continue; + } + if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, +- resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) ++ resource, list->vk_queue_flags, vk_info, &dst_access_mask, ++ &dst_stage_mask, &layout_after, list->device)) + { + FIXME("Unhandled state %#x.\n", state_after); + continue; +@@ -4303,7 +4309,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC + + resource = unsafe_impl_from_ID3D12Resource(uav->pResource); + vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, +- resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); ++ resource, list->vk_queue_flags, vk_info, &access_mask, ++ &stage_mask, &image_layout, list->device); + src_access_mask = dst_access_mask = access_mask; + src_stage_mask = dst_stage_mask = stage_mask; + layout_before = layout_after = image_layout; +@@ -4814,15 +4821,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + return; + } + +- if (!views) +- { +- WARN("NULL \"views\" pointer specified.\n"); +- return; +- } +- + for (i = 0; i < view_count; ++i) + { +- if (views[i].BufferLocation) ++ if (views && views[i].BufferLocation) + { + resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); + buffers[i] = resource->u.vk_buffer; +@@ -5434,6 +5435,52 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 + } + } + ++static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const struct vkd3d_resource_view *view, ++ struct d3d12_resource *resource, VkClearColorValue *colour) ++{ ++ struct vkd3d_texture_view_desc view_desc; ++ const struct vkd3d_format *uint_format; ++ struct vkd3d_view *uint_view; ++ ++ if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) ++ && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, colour))) ++ { ++ ERR("Unhandled format %#x.\n", view->format->dxgi_format); ++ return NULL; ++ } ++ ++ if (d3d12_resource_is_buffer(resource)) ++ { ++ if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_buffer, ++ uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) ++ { ++ ERR("Failed to create buffer view.\n"); ++ return NULL; ++ } ++ ++ return uint_view; ++ } ++ ++ memset(&view_desc, 0, sizeof(view_desc)); ++ view_desc.view_type = view->info.texture.vk_view_type; ++ view_desc.format = uint_format; ++ view_desc.miplevel_idx = view->info.texture.miplevel_idx; ++ view_desc.miplevel_count = 1; ++ view_desc.layer_idx = view->info.texture.layer_idx; ++ view_desc.layer_count = view->info.texture.layer_count; ++ view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; ++ view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; ++ ++ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, ++ resource->u.vk_image, &view_desc, &uint_view)) ++ { ++ ERR("Failed to create image view.\n"); ++ return NULL; ++ } ++ ++ return uint_view; ++} ++ + static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const UINT values[4], UINT rect_count, const D3D12_RECT *rects) +@@ -5441,8 +5488,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; + struct d3d12_device *device = list->device; +- struct vkd3d_texture_view_desc view_desc; +- const struct vkd3d_format *uint_format; + const struct vkd3d_resource_view *view; + struct d3d12_resource *resource_impl; + VkClearColorValue colour; +@@ -5456,44 +5501,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + view = &descriptor->v; + memcpy(colour.uint32, values, sizeof(colour.uint32)); + +- if (view->format->type != VKD3D_FORMAT_TYPE_UINT) ++ if (view->format->type != VKD3D_FORMAT_TYPE_UINT ++ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) + { +- if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) +- && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) +- { +- ERR("Unhandled format %#x.\n", view->format->dxgi_format); +- return; +- } +- +- if (d3d12_resource_is_buffer(resource_impl)) +- { +- if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, +- uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) +- { +- ERR("Failed to create buffer view.\n"); +- return; +- } +- } +- else +- { +- memset(&view_desc, 0, sizeof(view_desc)); +- view_desc.view_type = view->info.texture.vk_view_type; +- view_desc.format = uint_format; +- view_desc.miplevel_idx = view->info.texture.miplevel_idx; +- view_desc.miplevel_count = 1; +- view_desc.layer_idx = view->info.texture.layer_idx; +- view_desc.layer_count = view->info.texture.layer_count; +- view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; +- view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; +- +- if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, +- &uint_view)) +- { +- ERR("Failed to create image view.\n"); +- return; +- } +- } +- descriptor = uint_view; ++ ERR("Failed to create UINT view.\n"); ++ return; + } + + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); +@@ -5507,19 +5519,32 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I + const float values[4], UINT rect_count, const D3D12_RECT *rects) + { + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct vkd3d_view *descriptor, *uint_view = NULL; ++ struct d3d12_device *device = list->device; ++ const struct vkd3d_resource_view *view; + struct d3d12_resource *resource_impl; + VkClearColorValue colour; +- struct vkd3d_view *view; + + TRACE("iface %p, gpu_handle %s, cpu_handle %s, resource %p, values %p, rect_count %u, rects %p.\n", + iface, debug_gpu_handle(gpu_handle), debug_cpu_handle(cpu_handle), resource, values, rect_count, rects); + + resource_impl = unsafe_impl_from_ID3D12Resource(resource); +- if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) ++ if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; ++ view = &descriptor->v; + memcpy(colour.float32, values, sizeof(colour.float32)); + +- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); ++ if (view->format->type == VKD3D_FORMAT_TYPE_SINT ++ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) ++ { ++ ERR("Failed to create UINT view.\n"); ++ return; ++ } ++ ++ d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); ++ ++ if (uint_view) ++ vkd3d_view_decref(uint_view, device); + } + + static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index cfc9c5f5ed3..2bbc170504e 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -76,6 +76,14 @@ static const char * const required_device_extensions[] = + VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, + }; + ++/* In general we don't want to enable Vulkan beta extensions, but make an ++ * exception for VK_KHR_portability_subset because we draw no real feature from ++ * it, but it's still useful to be able to develop for MoltenVK without being ++ * spammed with validation errors. */ ++#ifndef VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME ++#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" ++#endif ++ + static const struct vkd3d_optional_extension_info optional_device_extensions[] = + { + /* KHR extensions */ +@@ -85,6 +93,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = + VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), + VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), + VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), ++ VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), + VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), + VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), +@@ -92,7 +101,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = + VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), + VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), + VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), +- VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), ++ VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), + VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), + VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), +@@ -1634,6 +1643,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + + vulkan_info->device_limits = physical_device_info->properties2.properties.limits; + vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; ++ vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; ++ vulkan_info->tessellation_shaders = physical_device_info->features2.features.tessellationShader; + vulkan_info->sparse_binding = features->sparseBinding; + vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; + vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; +@@ -3806,7 +3817,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + return E_INVALIDARG; + } + +- data->UnalignedBlockTexturesSupported = FALSE; ++ /* Vulkan does not restrict block texture alignment. */ ++ data->UnalignedBlockTexturesSupported = TRUE; + + TRACE("Unaligned block texture support %#x.\n", data->UnalignedBlockTexturesSupported); + return S_OK; +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index c897d9f2c5a..7d7f40c0953 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -1809,14 +1809,6 @@ static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC1 *d + return false; + } + +- if (align(desc->Width, format->block_width) != desc->Width +- || align(desc->Height, format->block_height) != desc->Height) +- { +- WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", +- desc->Width, desc->Height, desc->Format); +- return false; +- } +- + return true; + } + +@@ -4357,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript + return hr; + + descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); +- d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); ++ if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) ++ { ++ vkd3d_private_store_destroy(&descriptor_heap->private_store); ++ return hr; ++ } + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + + d3d12_device_add_ref(descriptor_heap->device = device); +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index d1fa866d9e3..7acd39d65be 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -123,6 +123,7 @@ struct vkd3d_vulkan_info + bool KHR_image_format_list; + bool KHR_maintenance2; + bool KHR_maintenance3; ++ bool KHR_portability_subset; + bool KHR_push_descriptor; + bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_timeline_semaphore; +@@ -145,6 +146,8 @@ struct vkd3d_vulkan_info + + bool rasterization_stream; + bool transform_feedback_queries; ++ bool geometry_shaders; ++ bool tessellation_shaders; + + bool uav_read_without_format; + +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-d3ba810c98b4d2df260a527f74586a0b314.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-d3ba810c98b4d2df260a527f74586a0b314.patch deleted file mode 100644 index 9f37d446..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-d3ba810c98b4d2df260a527f74586a0b314.patch +++ /dev/null @@ -1,3888 +0,0 @@ -From bcb85270b8635f3d35b7d559c1800597589c62d1 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 7 Mar 2024 10:40:41 +1100 -Subject: [PATCH] Updated vkd3d to d3ba810c98b4d2df260a527f74586a0b31408510. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 2 +- - libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 25 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 83 ++++- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 99 ++++-- - libs/vkd3d/libs/vkd3d-shader/fx.c | 21 +- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 6 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 53 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 46 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 234 ++++++++++--- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 117 +++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 144 ++++---- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 229 +++++++++---- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 317 ++++++++++++------ - .../libs/vkd3d-shader/vkd3d_shader_main.c | 30 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 11 +- - libs/vkd3d/libs/vkd3d/command.c | 136 ++++---- - libs/vkd3d/libs/vkd3d/device.c | 15 +- - libs/vkd3d/libs/vkd3d/resource.c | 8 - - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 2 + - 22 files changed, 1116 insertions(+), 467 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index a9d709d10fe..2d950b4f7aa 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -233,7 +233,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index f60ef7db769..c2c6ad67804 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -20,6 +20,7 @@ - #define WIDL_C_INLINE_WRAPPERS - #endif - #define COBJMACROS -+ - #define CONST_VTABLE - #include "vkd3d.h" - #include "vkd3d_blob.h" -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 9abc2c4db70..8a3eb5a367a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -254,6 +254,10 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_PHASE ] = "phase", - [VKD3DSIH_PHI ] = "phi", - [VKD3DSIH_POW ] = "pow", -+ [VKD3DSIH_QUAD_READ_ACROSS_D ] = "quad_read_across_d", -+ [VKD3DSIH_QUAD_READ_ACROSS_X ] = "quad_read_across_x", -+ [VKD3DSIH_QUAD_READ_ACROSS_Y ] = "quad_read_across_y", -+ [VKD3DSIH_QUAD_READ_LANE_AT ] = "quad_read_lane_at", - [VKD3DSIH_RCP ] = "rcp", - [VKD3DSIH_REP ] = "rep", - [VKD3DSIH_RESINFO ] = "resinfo", -@@ -1199,7 +1203,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - { - bool untyped = false; - -- switch (compiler->current->handler_idx) -+ switch (compiler->current->opcode) - { - case VKD3DSIH_MOV: - case VKD3DSIH_MOVC: -@@ -1755,7 +1759,7 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_BREAKP: - case VKD3DSIH_CONTINUEP: -@@ -1853,8 +1857,13 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - break; - - case VKD3DSIH_TEX: -- if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) -- vkd3d_string_buffer_printf(buffer, "p"); -+ if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0)) -+ { -+ if (ins->flags & VKD3DSI_TEXLD_PROJECT) -+ vkd3d_string_buffer_printf(buffer, "p"); -+ else if (ins->flags & VKD3DSI_TEXLD_BIAS) -+ vkd3d_string_buffer_printf(buffer, "b"); -+ } - break; - - case VKD3DSIH_WAVE_OP_ADD: -@@ -1937,9 +1946,9 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - if (ins->coissue) - vkd3d_string_buffer_printf(buffer, "+"); - -- shader_print_opcode(compiler, ins->handler_idx); -+ shader_print_opcode(compiler, ins->opcode); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_UAV_TYPED: -@@ -2430,7 +2439,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_ENDIF: -@@ -2459,7 +2468,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - - shader_dump_instruction(&compiler, ins); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_IF: -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index bfd5b52b436..2290385da76 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1060,7 +1060,7 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) - - static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) - { -- if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) -+ if ((ins->opcode == VKD3DSIH_BREAKP || ins->opcode == VKD3DSIH_IF) && ins->flags) - { - vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, - "Ignoring unexpected instruction flags %#x.", ins->flags); -@@ -1142,23 +1142,23 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - goto fail; - } - -- if (ins->handler_idx == VKD3DSIH_DCL) -+ if (ins->opcode == VKD3DSIH_DCL) - { - shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); - } -- else if (ins->handler_idx == VKD3DSIH_DEF) -+ else if (ins->opcode == VKD3DSIH_DEF) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_FLOAT); - shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } -- else if (ins->handler_idx == VKD3DSIH_DEFB) -+ else if (ins->opcode == VKD3DSIH_DEFB) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_SCALAR, VKD3D_DATA_UINT); - shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } -- else if (ins->handler_idx == VKD3DSIH_DEFI) -+ else if (ins->opcode == VKD3DSIH_DEFI) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_INT); -@@ -1195,7 +1195,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - return; - - fail: -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - *ptr = sm1->end; - } - -@@ -1326,7 +1326,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - ins = &instructions->elements[instructions->count]; - shader_sm1_read_instruction(&sm1, ins); - -- if (ins->handler_idx == VKD3DSIH_INVALID) -+ if (ins->opcode == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - vsir_program_cleanup(program); -@@ -1739,11 +1739,11 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - } - else - { -- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); - put_u32(buffer, var->bind_count[r]); - } - put_u32(buffer, 0); /* type */ -- put_u32(buffer, 0); /* FIXME: default value */ -+ put_u32(buffer, 0); /* default value */ - } - } - -@@ -1767,6 +1767,62 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); -+ -+ if (var->default_values) -+ { -+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int comp_count = hlsl_type_component_count(var->data_type); -+ unsigned int default_value_offset; -+ unsigned int k; -+ -+ default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); -+ set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); -+ -+ for (k = 0; k < comp_count; ++k) -+ { -+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ unsigned int comp_offset; -+ enum hlsl_regset regset; -+ -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ if (regset == HLSL_REGSET_NUMERIC) -+ { -+ union -+ { -+ uint32_t u; -+ float f; -+ } uni; -+ -+ switch (comp_type->e.numeric.type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &var->loc, "Write double default values."); -+ uni.u = 0; -+ break; -+ -+ case HLSL_TYPE_INT: -+ uni.f = var->default_values[k].value.i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ uni.f = var->default_values[k].value.u; -+ break; -+ -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ uni.u = var->default_values[k].value.u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); -+ } -+ } -+ } -+ - ++uniform_count; - } - } -@@ -2210,7 +2266,7 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - continue; - } - -- reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; -+ reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; - write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); - } - } -@@ -2515,7 +2571,7 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - struct sm1_instruction sm1_instr; - - sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); -- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; -+ reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; - - sm1_instr = (struct sm1_instruction) - { -@@ -2546,6 +2602,11 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; - break; - -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ sm1_instr.opcode = D3DSIO_TEX; -+ sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; -+ break; -+ - default: - hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); - return; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 73a8d8687c5..2176debc7d2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -458,6 +458,8 @@ enum dx_intrinsic_opcode - DX_WAVE_ACTIVE_OP = 119, - DX_WAVE_ACTIVE_BIT = 120, - DX_WAVE_PREFIX_OP = 121, -+ DX_QUAD_READ_LANE_AT = 122, -+ DX_QUAD_OP = 123, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, - DX_WAVE_ALL_BIT_COUNT = 135, -@@ -576,6 +578,13 @@ enum dxil_wave_op_kind - WAVE_OP_MAX = 3, - }; - -+enum dxil_quad_op_kind -+{ -+ QUAD_READ_ACROSS_X = 0, -+ QUAD_READ_ACROSS_Y = 1, -+ QUAD_READ_ACROSS_D = 2, -+}; -+ - struct sm6_pointer_info - { - const struct sm6_type *type; -@@ -3755,21 +3764,21 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - for (i = 0; i < sm6->p.program->instructions.count; ++i) - { - ins = &sm6->p.program->instructions.elements[i]; -- if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) -+ if (ins->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) - { - ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( - (uintptr_t)ins->declaration.indexable_temp.initialiser, sm6); - } -- else if (ins->handler_idx == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) -+ else if (ins->opcode == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) - { - ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); - } -- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) -+ else if (ins->opcode == VKD3DSIH_DCL_TGSM_RAW) - { - ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); - ins->flags = 0; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) -+ else if (ins->opcode == VKD3DSIH_DCL_TGSM_STRUCTURED) - { - ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); - ins->flags = 0; -@@ -4402,7 +4411,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record - code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, -@@ -4619,6 +4628,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co - return VKD3DSIH_IMAX; - case DX_IMIN: - return VKD3DSIH_IMIN; -+ case DX_QUAD_READ_LANE_AT: -+ return VKD3DSIH_QUAD_READ_LANE_AT; - case DX_UMAX: - return VKD3DSIH_UMAX; - case DX_UMIN: -@@ -4962,7 +4973,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - reg->non_uniform = !!sm6_value_get_constant_uint(operands[3]); - - /* NOP is used to flag no instruction emitted. */ -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -@@ -5370,6 +5381,47 @@ static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intr - sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); - } - -+static enum vkd3d_shader_opcode dx_map_quad_op(enum dxil_quad_op_kind op) -+{ -+ switch (op) -+ { -+ case QUAD_READ_ACROSS_X: -+ return VKD3DSIH_QUAD_READ_ACROSS_X; -+ case QUAD_READ_ACROSS_Y: -+ return VKD3DSIH_QUAD_READ_ACROSS_Y; -+ case QUAD_READ_ACROSS_D: -+ return VKD3DSIH_QUAD_READ_ACROSS_D; -+ default: -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum vkd3d_shader_opcode opcode; -+ enum dxil_quad_op_kind quad_op; -+ -+ quad_op = sm6_value_get_constant_uint(operands[1]); -+ if ((opcode = dx_map_quad_op(quad_op)) == VKD3DSIH_INVALID) -+ { -+ FIXME("Unhandled quad op kind %u.\n", quad_op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, -+ "Quad op kind %u is unhandled.", quad_op); -+ return; -+ } -+ -+ vsir_instruction_init(ins, &sm6->p.location, opcode); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -6229,6 +6281,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, - [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, - [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, -+ [DX_QUAD_OP ] = {"n", "Rc", sm6_parser_emit_dx_quad_op}, -+ [DX_QUAD_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, - [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, - [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, - [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -6381,7 +6435,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade - { - const struct sm6_type *type; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - - if (!dst->type) - return; -@@ -6628,7 +6682,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor - { - *dst = *value; - dst->type = type; -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - return; - } - -@@ -6739,7 +6793,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - * do not otherwise occur, so deleting these avoids the need for backend support. */ - if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) - { -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - *dst = *a; - return; - } -@@ -7039,7 +7093,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record - reg->idx_count = 2; - dst->structure_stride = src->structure_stride; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7189,7 +7243,7 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record - incoming[j].block = sm6_function_get_block(function, record->operands[i + 1], sm6); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - - qsort(incoming, phi->incoming_count, sizeof(*incoming), phi_incoming_compare); - -@@ -7224,7 +7278,7 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record - - code_block->terminator.type = TERMINATOR_RET; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7384,7 +7438,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec - terminator->cases[i / 2u].value = sm6_value_get_constant_uint64(src); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7843,7 +7897,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - } - - ins = &code_block->instructions[code_block->instruction_count]; -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - - dst = sm6_parser_get_current_value(sm6); - fwd_type = dst->type; -@@ -7922,7 +7976,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - - if (sm6->p.failed) - return VKD3D_ERROR; -- assert(ins->handler_idx != VKD3DSIH_INVALID); - - if (record->attachment) - metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); -@@ -7933,9 +7986,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; - } - if (code_block) -- code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; -- else -- assert(ins->handler_idx == VKD3DSIH_NOP); -+ code_block->instruction_count += ins->opcode != VKD3DSIH_NOP; - - if (dst->type && fwd_type && dst->type != fwd_type) - { -@@ -8735,7 +8786,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - - if (!m) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; - ins->declaration.raw_resource.resource.reg.write_mask = 0; - return &ins->declaration.raw_resource.resource; - } -@@ -8760,7 +8811,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - "A typed resource has no data type."); - } - -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; - for (i = 0; i < VKD3D_VEC4_SIZE; ++i) - ins->declaration.semantic.resource_data_type[i] = resource_values.data_type; - ins->declaration.semantic.resource_type = resource_type; -@@ -8770,14 +8821,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - } - else if (kind == RESOURCE_KIND_RAWBUFFER) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; - ins->declaration.raw_resource.resource.reg.write_mask = 0; - - return &ins->declaration.raw_resource.resource; - } - else if (kind == RESOURCE_KIND_STRUCTUREDBUFFER) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; - ins->declaration.structured_resource.byte_stride = resource_values.byte_stride; - ins->declaration.structured_resource.resource.reg.write_mask = 0; - -@@ -8858,7 +8909,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, - d->kind = kind; - d->reg_type = VKD3DSPR_RESOURCE; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; -- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL) -+ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - - init_resource_declaration(resource, VKD3DSPR_RESOURCE, d->reg_data_type, d->id, &d->range); -@@ -8932,7 +8983,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, - d->kind = values[0]; - d->reg_type = VKD3DSPR_UAV; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; -- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL_UAV_TYPED) -+ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - - init_resource_declaration(resource, VKD3DSPR_UAV, d->reg_data_type, d->id, &d->range); -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 57b4ac24212..3ba0a9ba994 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -494,7 +494,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - uint32_t semantic_offset, field_type_offset; - - name_offset = write_string(field->name, fx); -- semantic_offset = write_string(field->semantic.name, fx); -+ semantic_offset = write_string(field->semantic.raw_name, fx); - field_type_offset = write_type(field->type, fx); - - put_u32_unaligned(buffer, name_offset); -@@ -683,7 +683,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - } - - name_offset = write_string(name, fx); -- semantic_offset = write_string(semantic->name, fx); -+ semantic_offset = write_string(semantic->raw_name, fx); - - offset = put_u32(buffer, hlsl_sm1_base_type(type)); - put_u32(buffer, hlsl_sm1_class(type)); -@@ -794,6 +794,9 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_STRUCT: - /* FIXME: write actual initial value */ -+ if (var->default_values) -+ hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); -+ - offset = put_u32(buffer, 0); - - for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) -@@ -986,19 +989,18 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - }; - struct hlsl_ctx *ctx = fx->ctx; - -- /* Explicit bind point. */ -- if (var->reg_reservation.reg_type) -+ if (var->has_explicit_bind_point) - flags |= HAS_EXPLICIT_BIND_POINT; - - type_offset = write_type(var->data_type, fx); - name_offset = write_string(var->name, fx); -- semantic_offset = write_string(var->semantic.name, fx); -+ semantic_offset = write_string(var->semantic.raw_name, fx); - - put_u32(buffer, name_offset); - put_u32(buffer, type_offset); - - semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ -- put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ -+ put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ - value_offset = put_u32(buffer, 0); /* Default value offset */ - put_u32(buffer, flags); /* Flags */ - -@@ -1010,6 +1012,8 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - { - /* FIXME: write default value */ - set_u32(buffer, value_offset, 0); -+ if (var->default_values) -+ hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); - - put_u32(buffer, 0); /* Annotations count */ - if (has_annotations(var)) -@@ -1344,7 +1348,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - - type_offset = write_type(var->data_type, fx); - name_offset = write_string(var->name, fx); -- semantic_offset = write_string(var->semantic.name, fx); -+ semantic_offset = write_string(var->semantic.raw_name, fx); - - put_u32(buffer, name_offset); - put_u32(buffer, type_offset); -@@ -1464,6 +1468,9 @@ static void write_buffers(struct fx_write_context *fx, bool shared) - { - struct hlsl_buffer *buffer; - -+ if (shared && !fx->child_effect) -+ return; -+ - LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) - { - if (!buffer->size && !fx->include_empty_buffers) -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 3e482a5fc70..8725724a239 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -48,9 +48,9 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne - static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - shader_glsl_print_indent(&gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); -+ vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->opcode); - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); -+ "Internal compiler error: Unhandled instruction %#x.", ins->opcode); - } - - static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, -@@ -74,7 +74,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator - { - generator->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 99214fba6de..c69af4e94bb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -167,6 +167,8 @@ void hlsl_free_var(struct hlsl_ir_var *decl) - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - vkd3d_free((void *)decl->objects_usage[k]); - -+ vkd3d_free(decl->default_values); -+ - for (i = 0; i < decl->state_block_count; ++i) - hlsl_free_state_block(decl->state_blocks[i]); - vkd3d_free(decl->state_blocks); -@@ -556,12 +558,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - - switch (type->class) - { -- case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -- case HLSL_CLASS_MATRIX: - offset[HLSL_REGSET_NUMERIC] += idx; - break; - -+ case HLSL_CLASS_MATRIX: -+ offset[HLSL_REGSET_NUMERIC] += 4 * idx; -+ break; -+ - case HLSL_CLASS_STRUCT: - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - offset[r] += type->e.record.fields[idx].reg_offset[r]; -@@ -592,6 +596,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_SCALAR: - vkd3d_unreachable(); - } - type = next_type; -@@ -1247,6 +1252,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const cha - list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); - else - list_add_tail(&ctx->globals->vars, &var->scope_entry); -+ var->is_synthetic = true; - } - return var; - } -@@ -3086,6 +3092,33 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl - vkd3d_string_buffer_cleanup(&buffer); - } - -+void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) -+{ -+ unsigned int k, component_count = hlsl_type_component_count(var->data_type); -+ struct vkd3d_string_buffer buffer; -+ -+ vkd3d_string_buffer_init(&buffer); -+ if (!var->default_values) -+ { -+ vkd3d_string_buffer_printf(&buffer, "var \"%s\" has no default values.\n", var->name); -+ vkd3d_string_buffer_trace(&buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+ return; -+ } -+ -+ vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); -+ for (k = 0; k < component_count; ++k) -+ { -+ if (k % 4 == 0) -+ vkd3d_string_buffer_printf(&buffer, "\n "); -+ vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].value.u); -+ } -+ vkd3d_string_buffer_printf(&buffer, "\n"); -+ -+ vkd3d_string_buffer_trace(&buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+} -+ - void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) - { - struct hlsl_src *src, *next; -@@ -3319,9 +3352,25 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) - void hlsl_cleanup_semantic(struct hlsl_semantic *semantic) - { - vkd3d_free((void *)semantic->name); -+ vkd3d_free((void *)semantic->raw_name); - memset(semantic, 0, sizeof(*semantic)); - } - -+bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src) -+{ -+ *dst = *src; -+ dst->name = dst->raw_name = NULL; -+ if (src->name && !(dst->name = hlsl_strdup(ctx, src->name))) -+ return false; -+ if (src->raw_name && !(dst->raw_name = hlsl_strdup(ctx, src->raw_name))) -+ { -+ hlsl_cleanup_semantic(dst); -+ return false; -+ } -+ -+ return true; -+} -+ - static void free_function_decl(struct hlsl_ir_function_decl *decl) - { - unsigned int i; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 27814f3a56f..179cc219e68 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -222,6 +222,8 @@ struct hlsl_semantic - const char *name; - uint32_t index; - -+ /* Name exactly as it appears in the sources. */ -+ const char *raw_name; - /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ - bool reported_missing; - /* In case the variable or field that stores this semantic has already reported to use a -@@ -259,8 +261,20 @@ struct hlsl_struct_field - * struct. */ - struct hlsl_reg - { -- /* Index of the first register allocated. */ -+ /* Register number of the first register allocated. */ - uint32_t id; -+ /* For descriptors (buffer, texture, sampler, UAV) this is the base binding -+ * index of the descriptor. -+ * For 5.1 and above descriptors have space and may be arrayed, in which -+ * case the array shares a single register ID but has a range of register -+ * indices, and "id" and "index" are as a rule not equal. -+ * For versions below 5.1, the register number for descriptors is the same -+ * as its external binding index, so only "index" is used, and "id" is -+ * ignored. -+ * For numeric registers "index" is not used. */ -+ uint32_t index; -+ /* Register space of a descriptor. Not used for numeric registers. */ -+ uint32_t space; - /* Number of registers to be allocated. - * Unlike the variable's type's regsize, it is not expressed in register components, but rather - * in whole registers, and may depend on which components are used within the shader. */ -@@ -396,6 +410,14 @@ struct hlsl_reg_reservation - unsigned int offset_index; - }; - -+union hlsl_constant_value_component -+{ -+ uint32_t u; -+ int32_t i; -+ float f; -+ double d; -+}; -+ - struct hlsl_ir_var - { - struct hlsl_type *data_type; -@@ -418,6 +440,15 @@ struct hlsl_ir_var - /* Scope that contains annotations for this variable. */ - struct hlsl_scope *annotations; - -+ /* Array of default values the variable was initialized with, one for each component. -+ * Only for variables that need it, such as uniforms and variables inside constant buffers. -+ * This pointer is NULL for others. */ -+ struct hlsl_default_value -+ { -+ /* Default value, in case the component is a numeric value. */ -+ union hlsl_constant_value_component value; -+ } *default_values; -+ - /* A dynamic array containing the state block on the variable's declaration, if any. - * An array variable may contain multiple state blocks. - * A technique pass will always contain one. -@@ -460,6 +491,8 @@ struct hlsl_ir_var - uint32_t is_uniform : 1; - uint32_t is_param : 1; - uint32_t is_separated_resource : 1; -+ uint32_t is_synthetic : 1; -+ uint32_t has_explicit_bind_point : 1; - }; - - /* This struct is used to represent assignments in state block entries: -@@ -775,13 +808,7 @@ struct hlsl_ir_constant - struct hlsl_ir_node node; - struct hlsl_constant_value - { -- union hlsl_constant_value_component -- { -- uint32_t u; -- int32_t i; -- float f; -- double d; -- } u[4]; -+ union hlsl_constant_value_component u[4]; - } value; - /* Constant register of type 'c' where the constant value is stored for SM1. */ - struct hlsl_reg reg; -@@ -1249,6 +1276,7 @@ void hlsl_block_cleanup(struct hlsl_block *block); - bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); - - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); -+void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); - - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -@@ -1259,7 +1287,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); - - void hlsl_cleanup_deref(struct hlsl_deref *deref); -+ - void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); -+bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src); - - void hlsl_cleanup_ir_switch_cases(struct list *cases); - void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index a5923d8bf8e..91418775e1b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -49,7 +49,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); - RESERVED1 auto|catch|char|class|const_cast|delete|dynamic_cast|enum - RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public - RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try --RESERVED4 typename|union|unsigned|using|virtual -+RESERVED4 typename|union|using|virtual - - WS [ \t] - NEWLINE (\n)|(\r\n) -@@ -164,6 +164,7 @@ textureCUBE {return KW_TEXTURECUBE; } - TextureCubeArray {return KW_TEXTURECUBEARRAY; } - true {return KW_TRUE; } - typedef {return KW_TYPEDEF; } -+unsigned {return KW_UNSIGNED; } - uniform {return KW_UNIFORM; } - vector {return KW_VECTOR; } - VertexShader {return KW_VERTEXSHADER; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 9c1bdef926d..a2c89b15e4a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1013,6 +1013,10 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); - } - } -+ -+ if (hlsl_version_ge(ctx, 5, 1) && field->type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(field->type)) -+ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); -+ - vkd3d_free(v->arrays.sizes); - field->loc = v->loc; - field->name = v->name; -@@ -1286,13 +1290,13 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * - return block; - } - --static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -- const struct vkd3d_shader_location *loc) -+static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) - { -+ union hlsl_constant_value_component ret = {0}; - struct hlsl_ir_constant *constant; - struct hlsl_ir_node *node; - struct hlsl_block expr; -- unsigned int ret = 0; - struct hlsl_src src; - - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -@@ -1305,29 +1309,32 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - case HLSL_IR_LOAD: - case HLSL_IR_INDEX: - continue; -+ case HLSL_IR_STORE: -+ if (hlsl_ir_store(node)->lhs.var->is_synthetic) -+ break; -+ /* fall-through */ - case HLSL_IR_CALL: - case HLSL_IR_IF: - case HLSL_IR_LOOP: - case HLSL_IR_JUMP: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_RESOURCE_STORE: -- case HLSL_IR_STORE: - case HLSL_IR_SWITCH: - case HLSL_IR_STATEBLOCK_CONSTANT: - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); -+ break; - } - } - - if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -- return 0; -+ return ret; - hlsl_block_add_block(&expr, block); - -- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), -- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) -+ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) - { - hlsl_block_cleanup(&expr); -- return 0; -+ return ret; - } - - /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -@@ -1339,7 +1346,7 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); -- ret = constant->value.u[0].u; -+ ret = constant->value.u[0]; - } - else - { -@@ -1352,6 +1359,15 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - return ret; - } - -+static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct vkd3d_shader_location *loc) -+{ -+ union hlsl_constant_value_component res; -+ -+ res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -+ return res.u; -+} -+ - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) - { - /* Scalar vars can be converted to pretty much everything */ -@@ -2067,6 +2083,50 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - return true; - } - -+/* For some reason, for matrices, values from default value initializers end up in different -+ * components than from regular initializers. Default value initializers fill the matrix in -+ * vertical reading order (left-to-right top-to-bottom) instead of regular reading order -+ * (top-to-bottom left-to-right), so they have to be adjusted. */ -+static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, -+ struct hlsl_type *type, unsigned int index) -+{ -+ unsigned int element_comp_count, element, x, y, i; -+ unsigned int base = 0; -+ -+ if (ctx->profile->major_version < 4) -+ return index; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_MATRIX: -+ x = index / type->dimy; -+ y = index % type->dimy; -+ return y * type->dimx + x; -+ -+ case HLSL_CLASS_ARRAY: -+ element_comp_count = hlsl_type_component_count(type->e.array.type); -+ element = index / element_comp_count; -+ base = element * element_comp_count; -+ return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); -+ -+ case HLSL_CLASS_STRUCT: -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ struct hlsl_type *field_type = type->e.record.fields[i].type; -+ -+ element_comp_count = hlsl_type_component_count(field_type); -+ if (index - base < element_comp_count) -+ return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); -+ base += element_comp_count; -+ } -+ break; -+ -+ default: -+ return index; -+ } -+ vkd3d_unreachable(); -+} -+ - static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) - { -@@ -2087,12 +2147,29 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - -- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -- return; -+ if (dst->default_values) -+ { -+ struct hlsl_default_value default_value = {0}; -+ unsigned int dst_index; - -- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -- return; -- hlsl_block_add_block(instrs, &block); -+ if (!hlsl_clone_block(ctx, &block, instrs)) -+ return; -+ default_value.value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); -+ -+ dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); -+ dst->default_values[dst_index] = default_value; -+ -+ hlsl_block_cleanup(&block); -+ } -+ else -+ { -+ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -+ return; -+ -+ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -+ return; -+ hlsl_block_add_block(instrs, &block); -+ } - - ++*store_index; - } -@@ -2246,17 +2323,22 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - } - } - -+ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -+ { -+ /* SM 5.1/6.x descriptor arrays act differently from previous versions. -+ * Not only are they treated as a single object in reflection, but they -+ * act as a single component for the purposes of assignment and -+ * initialization. */ -+ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); -+ } -+ - if (!(var_name = vkd3d_strdup(v->name))) - return; - -- new_semantic = v->semantic; -- if (v->semantic.name) -+ if (!hlsl_clone_semantic(ctx, &new_semantic, &v->semantic)) - { -- if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) -- { -- vkd3d_free(var_name); -- return; -- } -+ vkd3d_free(var_name); -+ return; - } - - if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) -@@ -2348,6 +2430,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - { - struct parse_variable_def *v, *v_next; - struct hlsl_block *initializers; -+ unsigned int component_count; - struct hlsl_ir_var *var; - struct hlsl_type *type; - -@@ -2371,6 +2454,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - } - - type = var->data_type; -+ component_count = hlsl_type_component_count(type); - - var->state_blocks = v->state_blocks; - var->state_block_count = v->state_block_count; -@@ -2379,51 +2463,70 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - v->state_block_capacity = 0; - v->state_blocks = NULL; - -- if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) -+ if (var->state_blocks && component_count != var->state_block_count) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u state blocks, but got %u.", -- hlsl_type_component_count(type), var->state_block_count); -+ "Expected %u state blocks, but got %u.", component_count, var->state_block_count); - free_parse_variable_def(v); - continue; - } - - if (v->initializer.args_count) - { -- if (v->initializer.braces) -- { -- unsigned int size = initializer_size(&v->initializer); -- unsigned int store_index = 0; -- unsigned int k; -+ unsigned int store_index = 0; -+ bool is_default_values_initializer; -+ unsigned int size, k; -+ -+ is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) -+ || (var->storage_modifiers & HLSL_STORAGE_UNIFORM); - -- if (hlsl_type_component_count(type) != size) -+ if (is_default_values_initializer) -+ { -+ assert(!var->default_values); -+ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) - { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u components in initializer, but got %u.", -- hlsl_type_component_count(type), size); - free_parse_variable_def(v); - continue; - } -+ } - -- for (k = 0; k < v->initializer.args_count; ++k) -+ if (!v->initializer.braces) -+ { -+ if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) - { -- initialize_var_components(ctx, v->initializer.instrs, var, -- &store_index, v->initializer.args[k]); -+ free_parse_variable_def(v); -+ continue; - } -+ -+ v->initializer.args[0] = node_from_block(v->initializer.instrs); - } -- else -+ -+ size = initializer_size(&v->initializer); -+ if (component_count != size) - { -- struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u components in initializer, but got %u.", component_count, size); -+ free_parse_variable_def(v); -+ continue; -+ } - -- assert(v->initializer.args_count == 1); -- hlsl_block_add_instr(v->initializer.instrs, &load->node); -- add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); -+ for (k = 0; k < v->initializer.args_count; ++k) -+ { -+ initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); - } - -- if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ if (is_default_values_initializer) -+ { -+ hlsl_dump_var_default_values(var); -+ } -+ else if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ { - hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); -+ } - else -+ { - hlsl_block_add_block(initializers, v->initializer.instrs); -+ } - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -@@ -4061,11 +4164,15 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - hlsl_release_string_buffer(ctx, string); - } - -- if (!strcmp(name, "tex2Dlod")) -+ if (!strcmp(name, "tex2Dbias") -+ || !strcmp(name, "tex2Dlod")) - { - struct hlsl_ir_node *lod, *c; - -- load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ if (!strcmp(name, "tex2Dlod")) -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ else -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - - if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) - return false; -@@ -4187,6 +4294,12 @@ static bool intrinsic_tex2D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); - } - -+static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); -+} -+ - static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4407,6 +4520,7 @@ intrinsic_functions[] = - {"tanh", 1, true, intrinsic_tanh}, - {"tex1D", -1, false, intrinsic_tex1D}, - {"tex2D", -1, false, intrinsic_tex2D}, -+ {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, - {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, - {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, - {"tex3D", -1, false, intrinsic_tex3D}, -@@ -5566,6 +5680,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_TEXTURECUBEARRAY - %token KW_TRUE - %token KW_TYPEDEF -+%token KW_UNSIGNED - %token KW_UNIFORM - %token KW_VECTOR - %token KW_VERTEXSHADER -@@ -5717,8 +5832,7 @@ hlsl_prog: - | hlsl_prog buffer_declaration buffer_body - | hlsl_prog declaration_statement - { -- if (!list_empty(&$2->instrs)) -- hlsl_fixme(ctx, &@2, "Uniform initializer."); -+ hlsl_block_add_block(&ctx->static_initializers, $2); - destroy_block($2); - } - | hlsl_prog preproc_directive -@@ -6315,6 +6429,9 @@ semantic: - { - char *p; - -+ if (!($$.raw_name = hlsl_strdup(ctx, $2))) -+ YYABORT; -+ - for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) - ; - $$.name = $2; -@@ -6451,6 +6568,9 @@ parameter: - } - $$.type = type; - -+ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -+ hlsl_fixme(ctx, &@2, "Shader model 5.1+ resource array."); -+ - $$.name = $3; - $$.semantic = $5.semantic; - $$.reg_reservation = $5.reg_reservation; -@@ -6713,6 +6833,26 @@ type_no_void: - } - vkd3d_free($1); - } -+ | KW_UNSIGNED TYPE_IDENTIFIER -+ { -+ struct hlsl_type *type = hlsl_get_type(ctx->cur_scope, $2, true, true); -+ -+ if (hlsl_is_numeric_type(type) && type->e.numeric.type == HLSL_TYPE_INT) -+ { -+ if (!(type = hlsl_type_clone(ctx, type, 0, 0))) -+ YYABORT; -+ vkd3d_free((void *)type->name); -+ type->name = NULL; -+ type->e.numeric.type = HLSL_TYPE_UINT; -+ } -+ else -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "The 'unsigned' keyword can't be used with type %s.", $2); -+ } -+ -+ $$ = type; -+ } - | KW_STRUCT TYPE_IDENTIFIER - { - $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); -@@ -7156,6 +7296,8 @@ var_modifiers: - } - | var_identifier var_modifiers - { -+ $$ = $2; -+ - if (!strcmp($1, "precise")) - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); - else if (!strcmp($1, "single")) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index bdb72a1fab9..09066a6191a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -218,6 +218,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, - uniform->is_uniform = 1; - uniform->is_param = temp->is_param; - uniform->buffer = temp->buffer; -+ if (temp->default_values) -+ { -+ /* Transfer default values from the temp to the uniform. */ -+ assert(!uniform->default_values); -+ assert(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); -+ uniform->default_values = temp->default_values; -+ temp->default_values = NULL; -+ } - - if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) - return; -@@ -312,7 +320,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - } - } - -- if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) -+ if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) - { - vkd3d_free(new_name); - return NULL; -@@ -3815,15 +3823,16 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -+ const struct hlsl_reg_reservation *reservation = &var->reg_reservation; - unsigned int r; - -- if (var->reg_reservation.reg_type) -+ if (reservation->reg_type) - { - for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) - { - if (var->regs[r].allocation_size > 0) - { -- if (var->reg_reservation.reg_type != get_regset_name(r)) -+ if (reservation->reg_type != get_regset_name(r)) - { - struct vkd3d_string_buffer *type_string; - -@@ -3839,10 +3848,8 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - else - { - var->regs[r].allocated = true; -- var->regs[r].id = var->reg_reservation.reg_index; -- TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index + var->regs[r].allocation_size); -+ var->regs[r].space = reservation->reg_space; -+ var->regs[r].index = reservation->reg_index; - } - } - } -@@ -4762,13 +4769,14 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) - } - } - --static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) -+static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) - { - const struct hlsl_buffer *buffer; - - LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) - { -- if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) -+ if (buffer->used_size && buffer->reservation.reg_type == 'b' -+ && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) - return buffer; - } - return NULL; -@@ -4783,6 +4791,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - if (register_reservation) - { - var->buffer_offset = 4 * var->reg_reservation.reg_index; -+ var->has_explicit_bind_point = 1; - } - else - { -@@ -4815,6 +4824,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - } - } - var->buffer_offset = var->reg_reservation.offset_index; -+ var->has_explicit_bind_point = 1; - } - else - { -@@ -4916,8 +4926,8 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) - static void allocate_buffers(struct hlsl_ctx *ctx) - { - struct hlsl_buffer *buffer; -+ uint32_t index = 0, id = 0; - struct hlsl_ir_var *var; -- uint32_t index = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -4938,32 +4948,48 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - if (buffer->type == HLSL_BUFFER_CONSTANT) - { -- if (buffer->reservation.reg_type == 'b') -+ const struct hlsl_reg_reservation *reservation = &buffer->reservation; -+ -+ if (reservation->reg_type == 'b') - { -- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); -+ const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, -+ reservation->reg_space, reservation->reg_index); - - if (reserved_buffer && reserved_buffer != buffer) - { - hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); -+ "Multiple buffers bound to space %u, index %u.", -+ reservation->reg_space, reservation->reg_index); - hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, -- "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); -+ "Buffer %s is already bound to space %u, index %u.", -+ reserved_buffer->name, reservation->reg_space, reservation->reg_index); - } - -- buffer->reg.id = buffer->reservation.reg_index; -+ buffer->reg.space = reservation->reg_space; -+ buffer->reg.index = reservation->reg_index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ buffer->reg.id = id++; -+ else -+ buffer->reg.id = buffer->reg.index; - buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; -- TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); -+ TRACE("Allocated reserved %s to space %u, index %u, id %u.\n", -+ buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id); - } -- else if (!buffer->reservation.reg_type) -+ else if (!reservation->reg_type) - { -- while (get_reserved_buffer(ctx, index)) -+ while (get_reserved_buffer(ctx, 0, index)) - ++index; - -- buffer->reg.id = index; -+ buffer->reg.space = 0; -+ buffer->reg.index = index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ buffer->reg.id = id++; -+ else -+ buffer->reg.id = buffer->reg.index; - buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; -- TRACE("Allocated %s to cb%u.\n", buffer->name, index); -+ TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id); - ++index; - } - else -@@ -4980,7 +5006,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, -- uint32_t index, bool allocated_only) -+ uint32_t space, uint32_t index, bool allocated_only) - { - const struct hlsl_ir_var *var; - unsigned int start, count; -@@ -4995,12 +5021,18 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - start = var->reg_reservation.reg_index; - count = var->data_type->reg_size[regset]; - -+ if (var->reg_reservation.reg_space != space) -+ continue; -+ - if (!var->regs[regset].allocated && allocated_only) - continue; - } - else if (var->regs[regset].allocated) - { -- start = var->regs[regset].id; -+ if (var->regs[regset].space != space) -+ continue; -+ -+ start = var->regs[regset].index; - count = var->regs[regset].allocation_size; - } - else -@@ -5017,8 +5049,8 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - { - char regset_name = get_regset_name(regset); -+ uint32_t min_index = 0, id = 0; - struct hlsl_ir_var *var; -- uint32_t min_index = 0; - - if (regset == HLSL_REGSET_UAVS) - { -@@ -5041,35 +5073,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - if (var->regs[regset].allocated) - { - const struct hlsl_ir_var *reserved_object, *last_reported = NULL; -- unsigned int index, i; -+ unsigned int i; - -- if (var->regs[regset].id < min_index) -+ if (var->regs[regset].index < min_index) - { - assert(regset == HLSL_REGSET_UAVS); - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "UAV index (%u) must be higher than the maximum render target index (%u).", -- var->regs[regset].id, min_index - 1); -+ var->regs[regset].index, min_index - 1); - continue; - } - - for (i = 0; i < count; ++i) - { -- index = var->regs[regset].id + i; -+ unsigned int space = var->regs[regset].space; -+ unsigned int index = var->regs[regset].index + i; - - /* get_allocated_object() may return "var" itself, but we - * actually want that, otherwise we'll end up reporting the - * same conflict between the same two variables twice. */ -- reserved_object = get_allocated_object(ctx, regset, index, true); -+ reserved_object = get_allocated_object(ctx, regset, space, index, true); - if (reserved_object && reserved_object != var && reserved_object != last_reported) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple variables bound to %c%u.", regset_name, index); -+ "Multiple variables bound to space %u, %c%u.", regset_name, space, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, -- "Variable '%s' is already bound to %c%u.", reserved_object->name, -- regset_name, index); -+ "Variable '%s' is already bound to space %u, %c%u.", -+ reserved_object->name, regset_name, space, index); - last_reported = reserved_object; - } - } -+ -+ if (hlsl_version_ge(ctx, 5, 1)) -+ var->regs[regset].id = id++; -+ else -+ var->regs[regset].id = var->regs[regset].index; -+ TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n", -+ var->name, var->regs[regset].space, regset_name, var->regs[regset].index, -+ regset_name, var->regs[regset].index + count, var->regs[regset].id); - } - else - { -@@ -5078,7 +5119,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - while (available < count) - { -- if (get_allocated_object(ctx, regset, index, false)) -+ if (get_allocated_object(ctx, regset, 0, index, false)) - available = 0; - else - ++available; -@@ -5086,10 +5127,15 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - } - index -= count; - -- var->regs[regset].id = index; -+ var->regs[regset].space = 0; -+ var->regs[regset].index = index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ var->regs[regset].id = id++; -+ else -+ var->regs[regset].id = var->regs[regset].index; - var->regs[regset].allocated = true; -- TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, -- index + count); -+ TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name, -+ regset_name, index, regset_name, index + count, var->regs[regset].id); - ++index; - } - } -@@ -5295,6 +5341,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - assert(deref->data_type); - assert(hlsl_is_numeric_type(deref->data_type)); - -+ ret.index += offset / 4; - ret.id += offset / 4; - - ret.writemask = 0xf & (0xf << (offset % 4)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b3b745fc1b2..5f99be632f2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -46,9 +46,9 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade - - static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) -- || handler_idx == VKD3DSIH_HS_DECLS; -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) -+ || opcode == VKD3DSIH_HS_DECLS; - } - - static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) -@@ -60,9 +60,9 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i - - static bool vsir_instruction_init_with_params(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) -+ enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) - { -- vsir_instruction_init(ins, location, handler_idx); -+ vsir_instruction_init(ins, location, opcode); - ins->dst_count = dst_count; - ins->src_count = src_count; - -@@ -287,7 +287,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - mul_ins = &instructions->elements[pos]; - add_ins = &instructions->elements[pos + 1]; - -- mul_ins->handler_idx = VKD3DSIH_MUL; -+ mul_ins->opcode = VKD3DSIH_MUL; - mul_ins->src_count = 2; - - if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) -@@ -322,7 +322,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - { - struct vkd3d_shader_instruction *ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_IFC: - if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) -@@ -492,26 +492,26 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal - struct shader_phase_location *loc; - bool b; - -- if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -+ if (ins->opcode == VKD3DSIH_HS_FORK_PHASE || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) - { - b = flattener_is_in_fork_or_join_phase(normaliser); - /* Reset the phase info. */ - normaliser->phase_body_idx = ~0u; -- normaliser->phase = ins->handler_idx; -+ normaliser->phase = ins->opcode; - normaliser->instance_count = 1; - /* Leave the first occurrence and delete the rest. */ - if (b) - vkd3d_shader_instruction_make_nop(ins); - return; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT -- || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) -+ else if (ins->opcode == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT -+ || ins->opcode == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) - { - normaliser->instance_count = ins->declaration.count + !ins->declaration.count; - vkd3d_shader_instruction_make_nop(ins); - return; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( -+ else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( - &ins->declaration.dst.reg)) - { - vkd3d_shader_instruction_make_nop(ins); -@@ -524,7 +524,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal - if (normaliser->phase_body_idx == ~0u) - normaliser->phase_body_idx = index; - -- if (ins->handler_idx == VKD3DSIH_RET) -+ if (ins->opcode == VKD3DSIH_RET) - { - normaliser->last_ret_location = ins->location; - vkd3d_shader_instruction_make_nop(ins); -@@ -679,11 +679,11 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 - } - - void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx) -+ enum vkd3d_shader_opcode opcode) - { - memset(ins, 0, sizeof(*ins)); - ins->location = *location; -- ins->handler_idx = handler_idx; -+ ins->opcode = opcode; - } - - static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, -@@ -865,12 +865,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - { - ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser.phase = ins->handler_idx; -+ normaliser.phase = ins->opcode; - break; - default: - if (vsir_instruction_is_dcl(ins)) -@@ -888,7 +888,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - { - ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - input_control_point_count = ins->declaration.count; -@@ -1526,7 +1526,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - struct vkd3d_shader_register *reg; - unsigned int i; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT: - if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) -@@ -1560,7 +1560,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser->phase = ins->handler_idx; -+ normaliser->phase = ins->opcode; - memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); - memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); - memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); -@@ -1594,7 +1594,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - { - ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - normaliser.output_control_point_count = ins->declaration.count; -@@ -1608,7 +1608,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - /* fall through */ - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser.phase = ins->handler_idx; -+ normaliser.phase = ins->opcode; - break; - default: - break; -@@ -1740,7 +1740,7 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) -+ if (ins->opcode == VKD3DSIH_DEF || ins->opcode == VKD3DSIH_DEFI || ins->opcode == VKD3DSIH_DEFB) - { - struct flat_constant_def *def; - -@@ -1779,7 +1779,7 @@ static void remove_dead_code(struct vsir_program *program) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_IF: - case VKD3DSIH_LOOP: -@@ -1799,7 +1799,7 @@ static void remove_dead_code(struct vsir_program *program) - { - if (depth > 0) - { -- if (ins->handler_idx != VKD3DSIH_ELSE) -+ if (ins->opcode != VKD3DSIH_ELSE) - --depth; - vkd3d_shader_instruction_make_nop(ins); - } -@@ -1870,14 +1870,14 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - struct vkd3d_shader_src_param *srcs; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_TEX: - if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 3); - -- ins->handler_idx = VKD3DSIH_SAMPLE; -+ ins->opcode = VKD3DSIH_SAMPLE; - - srcs[0] = ins->src[0]; - -@@ -1919,7 +1919,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - case VKD3DSIH_TEXREG2RGB: - vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " -- "Combined sampler instruction %#x.", ins->handler_idx); -+ "Combined sampler instruction %#x.", ins->opcode); - return VKD3D_ERROR_NOT_IMPLEMENTED; - - default: -@@ -2030,7 +2030,7 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, - { - struct vkd3d_shader_instruction *dst_ins; - -- if (instruction->handler_idx == VKD3DSIH_NOP) -+ if (instruction->opcode == VKD3DSIH_NOP) - return true; - - if (!(dst_ins = cf_flattener_require_space(flattener, 1))) -@@ -2245,9 +2245,9 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - * phase instruction, and in all other shader types begins with the first label instruction. - * Declaring an indexable temp with function scope is not considered a declaration, - * because it needs to live inside a function. */ -- if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) -+ if (!after_declarations_section && instruction->opcode != VKD3DSIH_NOP) - { -- bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP -+ bool is_function_indexable = instruction->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP - && instruction->declaration.indexable_temp.has_function_scope; - - if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) -@@ -2260,14 +2260,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - cf_info = flattener->control_flow_depth - ? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - if (!cf_flattener_copy_instruction(flattener, instruction)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) -+ if (instruction->opcode != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) - after_declarations_section = false; - break; - -@@ -2662,7 +2662,7 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - unsigned int case_count, j, default_label; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); -@@ -2858,7 +2858,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - - /* Only phi src/dst SSA values need be converted here. Structurisation may - * introduce new cases of undominated SSA use, which will be handled later. */ -- if (ins->handler_idx != VKD3DSIH_PHI) -+ if (ins->opcode != VKD3DSIH_PHI) - continue; - ++phi_count; - -@@ -2907,7 +2907,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); -@@ -3336,7 +3336,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) - if (block->label == 0) - continue; - -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_RET: - shape = "trapezium"; -@@ -3478,7 +3478,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; - bool finish = false; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: -@@ -3533,7 +3533,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - if (block->label == 0) - continue; - -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_RET: - break; -@@ -4192,7 +4192,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - structure->u.block = block; - - /* Generate between zero and two jump instructions. */ -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_BRANCH: - { -@@ -5049,7 +5049,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -@@ -5064,7 +5064,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -- TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); -+ TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); - target.instructions[target.ins_count++] = *ins; - ++i; - if ((ret = vsir_program_structurize_function(program, message_context, -@@ -5222,7 +5222,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -@@ -5237,7 +5237,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -- TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); -+ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); - ++i; - if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( - program, message_context, &i)) < 0) -@@ -5641,7 +5641,7 @@ static void vsir_validate_dst_count(struct validation_context *ctx, - if (instruction->dst_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, - "Invalid destination count %u for an instruction of type %#x, expected %u.", -- instruction->dst_count, instruction->handler_idx, count); -+ instruction->dst_count, instruction->opcode, count); - } - - static void vsir_validate_src_count(struct validation_context *ctx, -@@ -5650,7 +5650,7 @@ static void vsir_validate_src_count(struct validation_context *ctx, - if (instruction->src_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - } - - static bool vsir_validate_src_min_count(struct validation_context *ctx, -@@ -5660,7 +5660,7 @@ static bool vsir_validate_src_min_count(struct validation_context *ctx, - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at least %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - return false; - } - -@@ -5674,7 +5674,7 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at most %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - return false; - } - -@@ -5701,7 +5701,7 @@ static void vsir_validate_cf_type(struct validation_context *ctx, - assert(expected_type != CF_TYPE_UNKNOWN); - if (ctx->cf_type != expected_type) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", -- instruction->handler_idx, name_from_cf_type(ctx->cf_type)); -+ instruction->opcode, name_from_cf_type(ctx->cf_type)); - } - - static void vsir_validate_instruction(struct validation_context *ctx) -@@ -5718,13 +5718,13 @@ static void vsir_validate_instruction(struct validation_context *ctx) - for (i = 0; i < instruction->src_count; ++i) - vsir_validate_src_param(ctx, &instruction->src[i]); - -- if (instruction->handler_idx >= VKD3DSIH_INVALID) -+ if (instruction->opcode >= VKD3DSIH_INVALID) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", -- instruction->handler_idx); -+ instruction->opcode); - } - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_HS_CONTROL_POINT_PHASE: -@@ -5733,12 +5733,14 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (version->type != VKD3D_SHADER_TYPE_HULL) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -+ "Phase instruction %#x is only valid in a hull shader.", -+ instruction->opcode); - if (ctx->depth != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.", -- instruction->handler_idx); -- ctx->phase = instruction->handler_idx; -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Phase instruction %#x must appear to top level.", -+ instruction->opcode); -+ ctx->phase = instruction->opcode; - ctx->dcl_temps_found = false; - return; - -@@ -5812,7 +5814,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - && ctx->phase == VKD3DSIH_INVALID) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Instruction %#x appear before any phase instruction in a hull shader.", -- instruction->handler_idx); -+ instruction->opcode); - - /* We support two different control flow types in shaders: - * block-based, like DXIL and SPIR-V, and structured, like D3DBC -@@ -5824,7 +5826,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - * block, but need for that hasn't arisen yet, so we don't. */ - if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) - { -- if (instruction->handler_idx == VKD3DSIH_LABEL) -+ if (instruction->opcode == VKD3DSIH_LABEL) - ctx->cf_type = CF_TYPE_BLOCKS; - else - ctx->cf_type = CF_TYPE_STRUCTURED; -@@ -5832,7 +5834,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - - if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) - { -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_LABEL: - if (ctx->inside_block) -@@ -5844,20 +5846,22 @@ static void vsir_validate_instruction(struct validation_context *ctx) - case VKD3DSIH_BRANCH: - case VKD3DSIH_SWITCH_MONOLITHIC: - if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid instruction %#x outside any block.", -+ instruction->opcode); - ctx->inside_block = false; - break; - - default: - if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid instruction %#x outside any block.", -+ instruction->opcode); - break; - } - } - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_TEMPS: - vsir_validate_dst_count(ctx, instruction, 0); -@@ -5877,7 +5881,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_IFC: -@@ -5896,7 +5900,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); - else -- ctx->blocks[ctx->depth - 1] = instruction->handler_idx; -+ ctx->blocks[ctx->depth - 1] = instruction->opcode; - break; - - case VKD3DSIH_ENDIF: -@@ -5915,7 +5919,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDLOOP: -@@ -5934,7 +5938,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDREP: -@@ -5953,7 +5957,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDSWITCH: -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index be50d3b9020..a3cdbe559a7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -20,6 +20,7 @@ - - %{ - -+#include "preproc.h" - #include "preproc.tab.h" - - #undef ERROR /* defined in wingdi.h */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 984a4f894f6..524fb8e9b1f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -1752,6 +1752,22 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu - return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); - } - -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_swap(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t op_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadSwap, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, op_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t index_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadBroadcast, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, index_id); -+} -+ - static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t val_id) - { -@@ -6831,7 +6847,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - uint32_t function_id, void_id, function_type_id; - struct vkd3d_shader_phase *phase; - -- assert(compiler->phase != instruction->handler_idx); -+ assert(compiler->phase != instruction->opcode); - - if (!is_in_default_phase(compiler)) - spirv_compiler_leave_shader_phase(compiler); -@@ -6843,16 +6859,16 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_function(builder, void_id, function_id, - SpvFunctionControlMaskNone, function_type_id); - -- compiler->phase = instruction->handler_idx; -+ compiler->phase = instruction->opcode; - spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); - -- phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ phase = (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - ? &compiler->control_point_phase : &compiler->patch_constant_phase; - phase->function_id = function_id; - /* The insertion location must be set after the label is emitted. */ - phase->function_location = 0; - -- if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - compiler->emit_default_control_point_phase = instruction->flags; - } - -@@ -7016,7 +7032,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp spirv_op; - } - alu_ops[] = -@@ -7056,7 +7072,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - - for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) - { -- if (alu_ops[i].handler_idx == instruction->handler_idx) -+ if (alu_ops[i].opcode == instruction->opcode) - return alu_ops[i].spirv_op; - } - -@@ -7065,7 +7081,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - - static SpvOp spirv_compiler_map_logical_instruction(const struct vkd3d_shader_instruction *instruction) - { -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_AND: - return SpvOpLogicalAnd; -@@ -7090,20 +7106,20 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) - { -- val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); -+ val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); - } - else if (dst->reg.data_type == VKD3D_DATA_DOUBLE) - { - /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ -- val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); -+ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); - } - else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) - { -- val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); -+ val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); - } - else if (dst->reg.data_type == VKD3D_DATA_UINT64) - { -- val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); -+ val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); - } - else - { -@@ -7126,7 +7142,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - SpvOp op = SpvOpMax; - unsigned int i; - -- if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) -+ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->opcode == VKD3DSIH_COUNTBITS) - { - /* At least some drivers support this anyway, but if validation is enabled it will fail. */ - FIXME("Unsupported 64-bit source for bit count.\n"); -@@ -7142,8 +7158,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - /* VSIR supports logic ops AND/OR/XOR on bool values. */ - op = spirv_compiler_map_logical_instruction(instruction); - } -- else if (instruction->handler_idx == VKD3DSIH_ITOF || instruction->handler_idx == VKD3DSIH_UTOF -- || instruction->handler_idx == VKD3DSIH_ITOI || instruction->handler_idx == VKD3DSIH_UTOU) -+ else if (instruction->opcode == VKD3DSIH_ITOF || instruction->opcode == VKD3DSIH_UTOF -+ || instruction->opcode == VKD3DSIH_ITOI || instruction->opcode == VKD3DSIH_UTOU) - { - /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, - * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ -@@ -7158,9 +7174,9 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - - if (op == SpvOpMax) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, -- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); -+ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); - return VKD3D_ERROR_INVALID_SHADER; - } - -@@ -7179,8 +7195,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - * Microsoft fxc will compile immediate constants larger than 5 bits. - * Fixing up the constants would be more elegant, but the simplest way is - * to let this handle constants too. */ -- if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->handler_idx == VKD3DSIH_ISHL -- || instruction->handler_idx == VKD3DSIH_ISHR || instruction->handler_idx == VKD3DSIH_USHR)) -+ if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->opcode == VKD3DSIH_ISHL -+ || instruction->opcode == VKD3DSIH_ISHR || instruction->opcode == VKD3DSIH_USHR)) - { - uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, - VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); -@@ -7218,7 +7234,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - enum GLSLstd450 glsl_inst; - } - glsl_insts[] = -@@ -7258,7 +7274,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - - for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) - { -- if (glsl_insts[i].handler_idx == instruction->handler_idx) -+ if (glsl_insts[i].opcode == instruction->opcode) - return glsl_insts[i].glsl_inst; - } - -@@ -7276,20 +7292,20 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - unsigned int i, component_count; - enum GLSLstd450 glsl_inst; - -- if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) -+ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->opcode == VKD3DSIH_FIRSTBIT_HI -+ || instruction->opcode == VKD3DSIH_FIRSTBIT_LO || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI)) - { - /* At least some drivers support this anyway, but if validation is enabled it will fail. */ -- FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); -+ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -- "64-bit source for handler %#x is not supported.", instruction->handler_idx); -+ "64-bit source for handler %#x is not supported.", instruction->opcode); - return; - } - - glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); - if (glsl_inst == GLSLstd450Bad) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -7306,8 +7322,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, - instr_set_id, glsl_inst, src_id, instruction->src_count); - -- if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) -+ if (instruction->opcode == VKD3DSIH_FIRSTBIT_HI -+ || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI) - { - /* In D3D bits are numbered from the most significant bit. */ - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -7415,7 +7431,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, - - if (src[0].reg.data_type != VKD3D_DATA_BOOL) - { -- if (instruction->handler_idx == VKD3DSIH_CMP) -+ if (instruction->opcode == VKD3DSIH_CMP) - condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, - spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); -@@ -7469,9 +7485,9 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, - component_count = vsir_write_mask_component_count(dst->write_mask); - component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - -- if (instruction->handler_idx == VKD3DSIH_DP4) -+ if (instruction->opcode == VKD3DSIH_DP4) - write_mask = VKD3DSP_WRITEMASK_ALL; -- else if (instruction->handler_idx == VKD3DSIH_DP3) -+ else if (instruction->opcode == VKD3DSIH_DP3) - write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; - else - write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; -@@ -7606,8 +7622,8 @@ static void spirv_compiler_emit_int_div(struct spirv_compiler *compiler, - unsigned int component_count = 0; - SpvOp div_op, mod_op; - -- div_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; -- mod_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; -+ div_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; -+ mod_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; - - if (dst[0].reg.type != VKD3DSPR_NULL) - { -@@ -7778,13 +7794,13 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp - mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); - size_id = spirv_compiler_get_constant_uint(compiler, size); - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; - case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; - case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -7895,7 +7911,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - unsigned int component_count; - SpvOp op; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DEQO: - case VKD3DSIH_EQO: op = SpvOpFOrdEqual; break; -@@ -7916,7 +7932,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; - case VKD3DSIH_ULT: op = SpvOpULessThan; break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -7949,7 +7965,7 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c - src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); - src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); - val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); -- if (instruction->handler_idx == VKD3DSIH_ORD) -+ if (instruction->opcode == VKD3DSIH_ORD) - val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } -@@ -7964,7 +7980,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil - unsigned int component_count; - SpvOp op; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; - case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; -@@ -8262,7 +8278,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - - static const struct instruction_info - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp op; - bool needs_derivative_control; - } -@@ -8279,7 +8295,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - info = NULL; - for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) - { -- if (deriv_instructions[i].handler_idx == instruction->handler_idx) -+ if (deriv_instructions[i].opcode == instruction->opcode) - { - info = &deriv_instructions[i]; - break; -@@ -8287,7 +8303,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - } - if (!info) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -8497,7 +8513,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, - uint32_t coordinate_mask; - bool multisample; - -- multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; -+ multisample = instruction->opcode == VKD3DSIH_LD2DMS; - - spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); - -@@ -8576,7 +8592,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - spirv_compiler_prepare_image(compiler, &image, - &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_SAMPLE: - op = SpvOpImageSampleImplicitLod; -@@ -8603,7 +8619,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - &src[3], VKD3DSP_WRITEMASK_0); - break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -8637,7 +8653,7 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, - uint32_t image_operands[2]; - SpvOp op; - -- if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) -+ if (instruction->opcode == VKD3DSIH_SAMPLE_C_LZ) - { - op = SpvOpImageSampleDrefExplicitLod; - operands_mask |= SpvImageOperandsLodMask; -@@ -8687,12 +8703,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, - uint32_t coordinate_mask; - bool extended_offset; - -- if (instruction->handler_idx == VKD3DSIH_GATHER4_C -- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) -+ if (instruction->opcode == VKD3DSIH_GATHER4_C -+ || instruction->opcode == VKD3DSIH_GATHER4_PO_C) - image_flags |= VKD3D_IMAGE_FLAG_DEPTH; - -- extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO -- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; -+ extended_offset = instruction->opcode == VKD3DSIH_GATHER4_PO -+ || instruction->opcode == VKD3DSIH_GATHER4_PO_C; - - addr = &src[0]; - offset = extended_offset ? &src[1] : NULL; -@@ -8963,7 +8979,6 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * - { - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); -- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -@@ -9007,7 +9022,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); -- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -@@ -9145,7 +9159,7 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c - uint32_t operands[3]; - SpvOp op; - -- op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC -+ op = instruction->opcode == VKD3DSIH_IMM_ATOMIC_ALLOC - ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; - - resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); -@@ -9211,7 +9225,7 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp spirv_op; - } - atomic_ops[] = -@@ -9240,16 +9254,16 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins - - for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) - { -- if (atomic_ops[i].handler_idx == instruction->handler_idx) -+ if (atomic_ops[i].opcode == instruction->opcode) - return atomic_ops[i].spirv_op; - } - - return SpvOpMax; - } - --static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) -+static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode opcode) - { -- return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; -+ return VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR; - } - - static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compiler, -@@ -9274,12 +9288,12 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - bool raw; - SpvOp op; - -- resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; -+ resource = is_imm_atomic_instruction(instruction->opcode) ? &dst[1] : &dst[0]; - - op = spirv_compiler_map_atomic_instruction(instruction); - if (op == SpvOpMax) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -9360,7 +9374,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - { - WARN("Ignoring 'volatile' attribute.\n"); - spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, -- "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); -+ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->opcode); - } - - memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) -@@ -9379,7 +9393,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, - op, type_id, operands, i); - -- if (is_imm_atomic_instruction(instruction->handler_idx)) -+ if (is_imm_atomic_instruction(instruction->opcode)) - spirv_compiler_emit_store_dst(compiler, dst, result_id); - } - -@@ -9684,13 +9698,13 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, - - src_ids[src_count++] = register_info.id; - -- if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) -+ if (instruction->opcode == VKD3DSIH_EVAL_CENTROID) - { - op = GLSLstd450InterpolateAtCentroid; - } - else - { -- assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); -+ assert(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); - op = GLSLstd450InterpolateAtSample; - src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); - } -@@ -9772,7 +9786,7 @@ static void spirv_compiler_emit_emit_stream(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int stream_idx; - -- if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) -+ if (instruction->opcode == VKD3DSIH_EMIT_STREAM) - stream_idx = instruction->src[0].reg.idx[0].offset; - else - stream_idx = 0; -@@ -9793,7 +9807,7 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int stream_idx; - -- if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) -+ if (instruction->opcode == VKD3DSIH_CUT_STREAM) - stream_idx = instruction->src[0].reg.idx[0].offset; - else - stream_idx = 0; -@@ -9807,9 +9821,68 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_end_primitive(builder); - } - --static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) -+static uint32_t map_quad_read_across_direction(enum vkd3d_shader_opcode opcode) - { -- switch (handler_idx) -+ switch (opcode) -+ { -+ case VKD3DSIH_QUAD_READ_ACROSS_X: -+ return 0; -+ case VKD3DSIH_QUAD_READ_ACROSS_Y: -+ return 1; -+ case VKD3DSIH_QUAD_READ_ACROSS_D: -+ return 2; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, direction_type_id, direction_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VKD3D_DATA_UINT, 1); -+ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ direction_id = map_quad_read_across_direction(instruction->opcode); -+ direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_swap(builder, type_id, val_id, direction_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id, lane_id; -+ -+ if (!register_is_constant_or_undef(&src[1].reg)) -+ { -+ FIXME("Unsupported non-constant quad read lane index.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "Non-constant quad read lane indices are not supported."); -+ return; -+ } -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode opcode) -+{ -+ switch (opcode) - { - case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: - return SpvOpGroupNonUniformAllEqual; -@@ -9833,7 +9906,7 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, - - vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); - -- op = map_wave_bool_op(instruction->handler_idx); -+ op = map_wave_bool_op(instruction->opcode); - type_id = vkd3d_spirv_get_op_type_bool(builder); - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, -@@ -9865,9 +9938,9 @@ static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compil - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - --static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) -+static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode opcode, bool is_float) - { -- switch (handler_idx) -+ switch (opcode) - { - case VKD3DSIH_WAVE_ACTIVE_BIT_AND: - return SpvOpGroupNonUniformBitwiseAnd; -@@ -9905,7 +9978,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, - uint32_t type_id, val_id; - SpvOp op; - -- op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); -+ op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type)); - - type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); -@@ -9928,7 +10001,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, - SpvGroupOperation group_op; - uint32_t type_id, val_id; - -- group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan -+ group_op = (instruction->opcode == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan - : SpvGroupOperationReduce; - - val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); -@@ -10014,7 +10087,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - - compiler->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_GLOBAL_FLAGS: - spirv_compiler_emit_dcl_global_flags(compiler, instruction); -@@ -10337,6 +10410,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CUT_STREAM: - spirv_compiler_emit_cut_stream(compiler, instruction); - break; -+ case VKD3DSIH_QUAD_READ_ACROSS_D: -+ case VKD3DSIH_QUAD_READ_ACROSS_X: -+ case VKD3DSIH_QUAD_READ_ACROSS_Y: -+ spirv_compiler_emit_quad_read_across(compiler, instruction); -+ break; -+ case VKD3DSIH_QUAD_READ_LANE_AT: -+ spirv_compiler_emit_quad_read_lane_at(compiler, instruction); -+ break; - case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: - case VKD3DSIH_WAVE_ALL_TRUE: - case VKD3DSIH_WAVE_ANY_TRUE: -@@ -10390,9 +10471,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - /* nothing to do */ - break; - default: -- FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); -+ FIXME("Unhandled instruction %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, -- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); -+ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); - break; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index b562e815a81..24206ae9a4d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -780,7 +780,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) - { - FIXME("Ignoring shader data type %#x.\n", type); -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - return; - } - -@@ -789,7 +789,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - if (icb_size % 4) - { - FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - -@@ -797,7 +797,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); - vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - icb->register_idx = 0; -@@ -2395,16 +2395,16 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(opcode_info = get_info_from_sm4_opcode(&sm4->lookup, opcode))) - { - FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - *ptr += len; - return; - } - - vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); -- if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE -- || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -- sm4->phase = ins->handler_idx; -- sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+ if (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->opcode == VKD3DSIH_HS_FORK_PHASE -+ || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) -+ sm4->phase = ins->opcode; -+ sm4->has_control_point_phase |= ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE; - ins->flags = 0; - ins->coissue = false; - ins->raw = false; -@@ -2417,7 +2417,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - { - ERR("Failed to allocate src parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; -@@ -2459,7 +2459,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - { - ERR("Failed to allocate dst parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - for (i = 0; i < ins->dst_count; ++i) -@@ -2467,7 +2467,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), - &dst_params[i]))) - { -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - dst_params[i].modifiers |= instruction_dst_modifier; -@@ -2478,7 +2478,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), - &src_params[i]))) - { -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - } -@@ -2488,7 +2488,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - - fail: - *ptr = sm4->end; -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - -@@ -2693,7 +2693,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(&sm4, ins); - -- if (ins->handler_idx == VKD3DSIH_INVALID) -+ if (ins->opcode == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - vsir_program_cleanup(program); -@@ -3178,13 +3178,14 @@ struct extern_resource - /* var is only not NULL if this resource is a whole variable, so it may be responsible for more - * than one component. */ - const struct hlsl_ir_var *var; -+ const struct hlsl_buffer *buffer; - - char *name; - struct hlsl_type *data_type; - bool is_user_packed; - - enum hlsl_regset regset; -- unsigned int id, bind_count; -+ unsigned int id, space, index, bind_count; - }; - - static int sm4_compare_extern_resources(const void *a, const void *b) -@@ -3196,7 +3197,10 @@ static int sm4_compare_extern_resources(const void *a, const void *b) - if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) - return r; - -- return vkd3d_u32_compare(aa->id, bb->id); -+ if ((r = vkd3d_u32_compare(aa->space, bb->space))) -+ return r; -+ -+ return vkd3d_u32_compare(aa->index, bb->index); - } - - static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -@@ -3220,6 +3224,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; - struct extern_resource *extern_resources = NULL; - const struct hlsl_ir_var *var; -+ struct hlsl_buffer *buffer; - enum hlsl_regset regset; - size_t capacity = 0; - char *name; -@@ -3272,13 +3277,16 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - hlsl_release_string_buffer(ctx, name_buffer); - - extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; - extern_resources[*count].data_type = component_type; - extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; - - extern_resources[*count].regset = regset; -- extern_resources[*count].id = var->regs[regset].id + regset_offset; -+ extern_resources[*count].id = var->regs[regset].id; -+ extern_resources[*count].space = var->regs[regset].space; -+ extern_resources[*count].index = var->regs[regset].index + regset_offset; - extern_resources[*count].bind_count = 1; - - ++*count; -@@ -3313,13 +3321,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - } - - extern_resources[*count].var = var; -+ extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; - extern_resources[*count].data_type = var->data_type; -- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -+ /* For some reason 5.1 resources aren't marked as -+ * user-packed, but cbuffers still are. */ -+ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) -+ && !!var->reg_reservation.reg_type; - - extern_resources[*count].regset = r; - extern_resources[*count].id = var->regs[r].id; -+ extern_resources[*count].space = var->regs[r].space; -+ extern_resources[*count].index = var->regs[r].index; - extern_resources[*count].bind_count = var->bind_count[r]; - - ++*count; -@@ -3327,14 +3341,51 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - } - } - -+ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (!buffer->reg.allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, buffer->name))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = buffer; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = NULL; -+ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; -+ -+ extern_resources[*count].regset = HLSL_REGSET_NUMERIC; -+ extern_resources[*count].id = buffer->reg.id; -+ extern_resources[*count].space = buffer->reg.space; -+ extern_resources[*count].index = buffer->reg.index; -+ extern_resources[*count].bind_count = 1; -+ -+ ++*count; -+ } -+ - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - return extern_resources; - } - - static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - { -- unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; -+ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -+ unsigned int cbuffer_count = 0, extern_resources_count, i, j; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; - struct vkd3d_bytecode_buffer buffer = {0}; -@@ -3354,19 +3405,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - -- resource_count += extern_resources_count; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- { - ++cbuffer_count; -- ++resource_count; -- } - } - - put_u32(&buffer, cbuffer_count); - cbuffer_position = put_u32(&buffer, 0); -- put_u32(&buffer, resource_count); -+ put_u32(&buffer, extern_resources_count); - resource_position = put_u32(&buffer, 0); - put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), - target_types[profile->type])); -@@ -3378,7 +3425,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -- put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ -+ put_u32(&buffer, binding_desc_size); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -@@ -3395,21 +3442,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); -- - if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, sm4_resource_type(resource->data_type)); -- if (resource->regset == HLSL_REGSET_SAMPLERS) -- { -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- } -+ if (resource->buffer) -+ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - else -+ put_u32(&buffer, sm4_resource_type(resource->data_type)); -+ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) - { - unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; - -@@ -3418,32 +3459,21 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } -- put_u32(&buffer, resource->id); -+ else -+ { -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ } -+ put_u32(&buffer, resource->index); - put_u32(&buffer, resource->bind_count); - put_u32(&buffer, flags); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- uint32_t flags = 0; -- -- if (!cbuffer->reg.allocated) -- continue; - - if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); -- -- if (cbuffer->reservation.reg_type) -- flags |= D3D_SIF_USERPACKED; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); -- put_u32(&buffer, 0); /* return type */ -- put_u32(&buffer, 0); /* dimension */ -- put_u32(&buffer, 0); /* multisample count */ -- put_u32(&buffer, cbuffer->reg.id); /* bind point */ -- put_u32(&buffer, 1); /* bind count */ -- put_u32(&buffer, flags); /* flags */ -+ { -+ put_u32(&buffer, resource->space); -+ put_u32(&buffer, resource->id); -+ } - } - - for (i = 0; i < extern_resources_count; ++i) -@@ -3451,16 +3481,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - - string_offset = put_string(&buffer, resource->name); -- set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (!cbuffer->reg.allocated) -- continue; -- -- string_offset = put_string(&buffer, cbuffer->name); -- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); -+ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); - } - - /* Buffers. */ -@@ -3522,7 +3543,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); - put_u32(&buffer, flags); - put_u32(&buffer, 0); /* type */ -- put_u32(&buffer, 0); /* FIXME: default value */ -+ put_u32(&buffer, 0); /* default value */ - - if (profile->major_version >= 5) - { -@@ -3546,6 +3567,34 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); - set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); -+ -+ if (var->default_values) -+ { -+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int comp_count = hlsl_type_component_count(var->data_type); -+ unsigned int default_value_offset; -+ unsigned int k; -+ -+ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); -+ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); -+ -+ for (k = 0; k < comp_count; ++k) -+ { -+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ unsigned int comp_offset; -+ enum hlsl_regset regset; -+ -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ if (regset == HLSL_REGSET_NUMERIC) -+ { -+ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -+ hlsl_fixme(ctx, &var->loc, "Write double default values."); -+ -+ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), -+ var->default_values[k].value.u); -+ } -+ } -+ } - ++j; - } - } -@@ -3720,30 +3769,57 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } - assert(regset == HLSL_REGSET_TEXTURES); -- reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } - assert(regset == HLSL_REGSET_UAVS); -- reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; -- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } - assert(regset == HLSL_REGSET_SAMPLERS); -- reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else -@@ -3753,9 +3829,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - assert(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->buffer->reg.id; -- reg->idx[1].offset = offset / 4; -- reg->idx_count = 2; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -+ reg->idx[2].offset = offset / 4; -+ reg->idx_count = 3; -+ } -+ else -+ { -+ reg->idx[0].offset = var->buffer->reg.index; -+ reg->idx[1].offset = offset / 4; -+ reg->idx_count = 2; -+ } - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } -@@ -4139,18 +4225,36 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - - static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) - { -- const struct sm4_instruction instr = -+ size_t size = (cbuffer->used_size + 3) / 4; -+ -+ struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, -- .srcs[0].reg.idx[0].offset = cbuffer->reg.id, -- .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, -- .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, - .src_count = 1, - }; -+ -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; -+ instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; -+ instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ -+ instr.srcs[0].reg.idx_count = 3; -+ -+ instr.idx[0] = size; -+ instr.idx[1] = cbuffer->reg.space; -+ instr.idx_count = 2; -+ } -+ else -+ { -+ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; -+ instr.srcs[0].reg.idx[1].offset = size; -+ instr.srcs[0].reg.idx_count = 2; -+ } -+ - write_sm4_instruction(tpf, &instr); - } - -@@ -4163,7 +4267,6 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3DSPR_SAMPLER, -- .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - -@@ -4179,7 +4282,22 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - -- instr.dsts[0].reg.idx[0].offset = resource->id + i; -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ assert(!i); -+ instr.dsts[0].reg.idx[0].offset = resource->id; -+ instr.dsts[0].reg.idx[1].offset = resource->index; -+ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[0] = resource->space; -+ instr.idx_count = 1; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = resource->index + i; -+ instr.dsts[0].reg.idx_count = 1; -+ } - write_sm4_instruction(tpf, &instr); - } - } -@@ -4212,6 +4330,23 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - .idx_count = 1, - }; - -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ assert(!i); -+ instr.dsts[0].reg.idx[0].offset = resource->id; -+ instr.dsts[0].reg.idx[1].offset = resource->index; -+ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[1] = resource->space; -+ instr.idx_count = 2; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = resource->index + i; -+ instr.dsts[0].reg.idx_count = 1; -+ } -+ - if (uav) - { - switch (resource->data_type->sampler_dim) -@@ -5799,21 +5934,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- { -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); -- - write_sm4_dcl_constant_buffer(&tpf, cbuffer); -- } - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); -- - if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -@@ -5875,7 +6002,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - for (unsigned int i = 0; i < extern_resources_count; ++i) - { -- if (extern_resources[i].data_type->e.resource.rasteriser_ordered) -+ if (extern_resources[i].data_type && extern_resources[i].data_type->e.resource.rasteriser_ordered) - *flags |= VKD3D_SM4_REQUIRES_ROVS; - } - sm4_free_extern_resources(extern_resources, extern_resources_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 14a3fa778e5..fdbde019111 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -23,6 +23,8 @@ - #include - #include - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -847,12 +849,13 @@ static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_contex - - static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) -- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) -- || handler_idx == VKD3DSIH_LD_UAV_TYPED -- || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) -- || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) -+ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR) -+ || opcode == VKD3DSIH_LD_UAV_TYPED -+ || (opcode == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) -+ || (opcode == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); - } - - static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, -@@ -863,9 +866,9 @@ static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context * - - static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC -- || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return opcode == VKD3DSIH_IMM_ATOMIC_ALLOC || opcode == VKD3DSIH_IMM_ATOMIC_CONSUME; - } - - static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, -@@ -876,9 +879,10 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_contex - - static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) -- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR); -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) -+ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR); - } - - static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, -@@ -1130,7 +1134,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - - context->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_CONSTANT_BUFFER: - vkd3d_shader_scan_constant_buffer_declaration(context, instruction); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 29b8d6ad022..37e3d31c995 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -455,6 +455,10 @@ enum vkd3d_shader_opcode - VKD3DSIH_PHASE, - VKD3DSIH_PHI, - VKD3DSIH_POW, -+ VKD3DSIH_QUAD_READ_ACROSS_D, -+ VKD3DSIH_QUAD_READ_ACROSS_X, -+ VKD3DSIH_QUAD_READ_ACROSS_Y, -+ VKD3DSIH_QUAD_READ_LANE_AT, - VKD3DSIH_RCP, - VKD3DSIH_REP, - VKD3DSIH_RESINFO, -@@ -805,6 +809,7 @@ enum vkd3d_tessellator_domain - - #define VKD3DSI_NONE 0x0 - #define VKD3DSI_TEXLD_PROJECT 0x1 -+#define VKD3DSI_TEXLD_BIAS 0x2 - #define VKD3DSI_INDEXED_DYNAMIC 0x4 - #define VKD3DSI_RESINFO_RCP_FLOAT 0x1 - #define VKD3DSI_RESINFO_UINT 0x2 -@@ -1189,7 +1194,7 @@ struct vkd3d_shader_location - struct vkd3d_shader_instruction - { - struct vkd3d_shader_location location; -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - uint32_t flags; - unsigned int dst_count; - unsigned int src_count; -@@ -1238,8 +1243,8 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns - return v->major < major || (v->major == major && v->minor <= minor); - } - --void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx); -+void vsir_instruction_init(struct vkd3d_shader_instruction *ins, -+ const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode); - - static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) - { -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 95366d3441b..6c463be8d60 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2025,7 +2025,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l - - static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, - const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, -- VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) -+ VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout, -+ struct d3d12_device *device) - { - bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); - VkPipelineStageFlags queue_shader_stages = 0; -@@ -2035,8 +2036,9 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT - | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT - | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT -- | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT - | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; -+ if (device->vk_info.geometry_shaders) -+ queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; - } - if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) - queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; -@@ -2054,7 +2056,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - { - if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) - return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, -- resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); -+ resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout, device); - - *access_mask = VK_ACCESS_MEMORY_READ_BIT; - *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -@@ -2251,7 +2253,8 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 - VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; - - if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, -- resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) -+ resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, -+ &dst_stage_mask, &barrier.newLayout, list->device)) - { - FIXME("Unhandled state %#x.\n", resource->initial_state); - return; -@@ -4277,13 +4280,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - } - - if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, -- resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) -+ resource, list->vk_queue_flags, vk_info, &src_access_mask, -+ &src_stage_mask, &layout_before, list->device)) - { - FIXME("Unhandled state %#x.\n", state_before); - continue; - } - if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, -- resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) -+ resource, list->vk_queue_flags, vk_info, &dst_access_mask, -+ &dst_stage_mask, &layout_after, list->device)) - { - FIXME("Unhandled state %#x.\n", state_after); - continue; -@@ -4303,7 +4308,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - - resource = unsafe_impl_from_ID3D12Resource(uav->pResource); - vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, -- resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); -+ resource, list->vk_queue_flags, vk_info, &access_mask, -+ &stage_mask, &image_layout, list->device); - src_access_mask = dst_access_mask = access_mask; - src_stage_mask = dst_stage_mask = stage_mask; - layout_before = layout_after = image_layout; -@@ -4814,15 +4820,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - return; - } - -- if (!views) -- { -- WARN("NULL \"views\" pointer specified.\n"); -- return; -- } -- - for (i = 0; i < view_count; ++i) - { -- if (views[i].BufferLocation) -+ if (views && views[i].BufferLocation) - { - resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); - buffers[i] = resource->u.vk_buffer; -@@ -5434,6 +5434,52 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - } - } - -+static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const struct vkd3d_resource_view *view, -+ struct d3d12_resource *resource, VkClearColorValue *colour) -+{ -+ struct vkd3d_texture_view_desc view_desc; -+ const struct vkd3d_format *uint_format; -+ struct vkd3d_view *uint_view; -+ -+ if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) -+ && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, colour))) -+ { -+ ERR("Unhandled format %#x.\n", view->format->dxgi_format); -+ return NULL; -+ } -+ -+ if (d3d12_resource_is_buffer(resource)) -+ { -+ if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_buffer, -+ uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) -+ { -+ ERR("Failed to create buffer view.\n"); -+ return NULL; -+ } -+ -+ return uint_view; -+ } -+ -+ memset(&view_desc, 0, sizeof(view_desc)); -+ view_desc.view_type = view->info.texture.vk_view_type; -+ view_desc.format = uint_format; -+ view_desc.miplevel_idx = view->info.texture.miplevel_idx; -+ view_desc.miplevel_count = 1; -+ view_desc.layer_idx = view->info.texture.layer_idx; -+ view_desc.layer_count = view->info.texture.layer_count; -+ view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; -+ view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; -+ -+ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, -+ resource->u.vk_image, &view_desc, &uint_view)) -+ { -+ ERR("Failed to create image view.\n"); -+ return NULL; -+ } -+ -+ return uint_view; -+} -+ - static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const UINT values[4], UINT rect_count, const D3D12_RECT *rects) -@@ -5441,8 +5487,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); - struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; -- struct vkd3d_texture_view_desc view_desc; -- const struct vkd3d_format *uint_format; - const struct vkd3d_resource_view *view; - struct d3d12_resource *resource_impl; - VkClearColorValue colour; -@@ -5456,44 +5500,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - view = &descriptor->v; - memcpy(colour.uint32, values, sizeof(colour.uint32)); - -- if (view->format->type != VKD3D_FORMAT_TYPE_UINT) -+ if (view->format->type != VKD3D_FORMAT_TYPE_UINT -+ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) - { -- if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) -- && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) -- { -- ERR("Unhandled format %#x.\n", view->format->dxgi_format); -- return; -- } -- -- if (d3d12_resource_is_buffer(resource_impl)) -- { -- if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, -- uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) -- { -- ERR("Failed to create buffer view.\n"); -- return; -- } -- } -- else -- { -- memset(&view_desc, 0, sizeof(view_desc)); -- view_desc.view_type = view->info.texture.vk_view_type; -- view_desc.format = uint_format; -- view_desc.miplevel_idx = view->info.texture.miplevel_idx; -- view_desc.miplevel_count = 1; -- view_desc.layer_idx = view->info.texture.layer_idx; -- view_desc.layer_count = view->info.texture.layer_count; -- view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; -- view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; -- -- if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, -- &uint_view)) -- { -- ERR("Failed to create image view.\n"); -- return; -- } -- } -- descriptor = uint_view; -+ ERR("Failed to create UINT view.\n"); -+ return; - } - - d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); -@@ -5507,19 +5518,32 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I - const float values[4], UINT rect_count, const D3D12_RECT *rects) - { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct vkd3d_view *descriptor, *uint_view = NULL; -+ struct d3d12_device *device = list->device; -+ const struct vkd3d_resource_view *view; - struct d3d12_resource *resource_impl; - VkClearColorValue colour; -- struct vkd3d_view *view; - - TRACE("iface %p, gpu_handle %s, cpu_handle %s, resource %p, values %p, rect_count %u, rects %p.\n", - iface, debug_gpu_handle(gpu_handle), debug_cpu_handle(cpu_handle), resource, values, rect_count, rects); - - resource_impl = unsafe_impl_from_ID3D12Resource(resource); -- if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) -+ if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) - return; -+ view = &descriptor->v; - memcpy(colour.float32, values, sizeof(colour.float32)); - -- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); -+ if (view->format->type == VKD3D_FORMAT_TYPE_SINT -+ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) -+ { -+ ERR("Failed to create UINT view.\n"); -+ return; -+ } -+ -+ d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); -+ -+ if (uint_view) -+ vkd3d_view_decref(uint_view, device); - } - - static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index cfc9c5f5ed3..674e46fe5c5 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -76,6 +76,14 @@ static const char * const required_device_extensions[] = - VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, - }; - -+/* In general we don't want to enable Vulkan beta extensions, but make an -+ * exception for VK_KHR_portability_subset because we draw no real feature from -+ * it, but it's still useful to be able to develop for MoltenVK without being -+ * spammed with validation errors. */ -+#ifndef VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME -+#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" -+#endif -+ - static const struct vkd3d_optional_extension_info optional_device_extensions[] = - { - /* KHR extensions */ -@@ -85,6 +93,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), - VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), - VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), -+ VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), - VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), - VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), - VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), -@@ -92,7 +101,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), - VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), - VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), -- VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), -+ VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), - VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), - VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), - VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), -@@ -1634,6 +1643,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; -+ vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; - vulkan_info->sparse_binding = features->sparseBinding; - vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; -@@ -3806,7 +3816,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - return E_INVALIDARG; - } - -- data->UnalignedBlockTexturesSupported = FALSE; -+ /* Vulkan does not restrict block texture alignment. */ -+ data->UnalignedBlockTexturesSupported = TRUE; - - TRACE("Unaligned block texture support %#x.\n", data->UnalignedBlockTexturesSupported); - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index c897d9f2c5a..db2d87428bb 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -1809,14 +1809,6 @@ static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC1 *d - return false; - } - -- if (align(desc->Width, format->block_width) != desc->Width -- || align(desc->Height, format->block_height) != desc->Height) -- { -- WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", -- desc->Width, desc->Height, desc->Format); -- return false; -- } -- - return true; - } - -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index d1fa866d9e3..a55e967cdfc 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -123,6 +123,7 @@ struct vkd3d_vulkan_info - bool KHR_image_format_list; - bool KHR_maintenance2; - bool KHR_maintenance3; -+ bool KHR_portability_subset; - bool KHR_push_descriptor; - bool KHR_sampler_mirror_clamp_to_edge; - bool KHR_timeline_semaphore; -@@ -145,6 +146,7 @@ struct vkd3d_vulkan_info - - bool rasterization_stream; - bool transform_feedback_queries; -+ bool geometry_shaders; - - bool uav_read_without_format; - --- -2.43.0 - diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-5a53b739959db74e8dcce023a7d49356b90.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-5a53b739959db74e8dcce023a7d49356b90.patch new file mode 100644 index 00000000..95d0ad2f --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-5a53b739959db74e8dcce023a7d49356b90.patch @@ -0,0 +1,1053 @@ +From e997993e7188ca80cee888a2593f36c423057b18 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Mon, 15 Jul 2024 10:03:30 +1000 +Subject: [PATCH] Updated vkd3d to 5a53b739959db74e8dcce023a7d49356b9008e92. + +--- + libs/vkd3d/include/vkd3d_shader.h | 217 ++++++++++++++ + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 11 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 12 + + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 280 +++++++++++++++++- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 99 ++++++- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 46 ++- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 8 +- + 9 files changed, 649 insertions(+), 28 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index d3afcc11b16..4acb622468a 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -105,6 +105,11 @@ enum vkd3d_shader_structure_type + * \since 1.10 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO, ++ /** ++ * The structure is a vkd3d_shader_parameter_info structure. ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -453,44 +458,167 @@ enum vkd3d_shader_binding_flag + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), + }; + ++/** ++ * The manner in which a parameter value is provided to the shader, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + enum vkd3d_shader_parameter_type + { + VKD3D_SHADER_PARAMETER_TYPE_UNKNOWN, ++ /** The parameter value is embedded directly in the shader. */ + VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, ++ /** ++ * The parameter value is provided to the shader via a specialization ++ * constant. This value is only supported for the SPIR-V target type. ++ */ + VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, ++ /** ++ * The parameter value is provided to the shader as part of a uniform ++ * buffer. ++ * ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_PARAMETER_TYPE_BUFFER, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_TYPE), + }; + ++/** ++ * The format of data provided to the shader, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + enum vkd3d_shader_parameter_data_type + { + VKD3D_SHADER_PARAMETER_DATA_TYPE_UNKNOWN, ++ /** The parameter is provided as a 32-bit unsigned integer. */ + VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, ++ /** The parameter is provided as a 32-bit float. \since 1.13 */ ++ VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), + }; + ++/** ++ * Names a specific shader parameter, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + enum vkd3d_shader_parameter_name + { + VKD3D_SHADER_PARAMETER_NAME_UNKNOWN, ++ /** ++ * The sample count of the framebuffer, as returned by the HLSL function ++ * GetRenderTargetSampleCount() or the GLSL builtin gl_NumSamples. ++ * ++ * This parameter should be specified when compiling to SPIR-V, which ++ * provides no builtin ability to query this information from the shader. ++ * ++ * The default value is 1. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ */ + VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, ++ /** ++ * Alpha test comparison function. When this parameter is provided, if the ++ * alpha component of the pixel shader colour output at location 0 fails the ++ * test, as defined by this function and the reference value provided by ++ * VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, the fragment will be ++ * discarded. ++ * ++ * This parameter, along with VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, ++ * can be used to implement fixed function alpha test, as present in ++ * Direct3D versions up to 9, if the target environment does not support ++ * alpha test as part of its own fixed-function API (as Vulkan and core ++ * OpenGL). ++ * ++ * The default value is VKD3D_SHADER_COMPARISON_FUNC_ALWAYS. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. The value specified must be ++ * a member of enum vkd3d_shader_comparison_func. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC, ++ /** ++ * Alpha test reference value. ++ * See VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC for documentation of ++ * alpha test. ++ * ++ * The default value is zero. ++ * ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), + }; + ++/** ++ * The value of an immediate constant parameter, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + struct vkd3d_shader_parameter_immediate_constant + { + union + { ++ /** ++ * The value if the parameter's data type is ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ */ + uint32_t u32; ++ /** ++ * The value if the parameter's data type is ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * \since 1.13 ++ */ ++ float f32; + } u; + }; + ++/** ++ * The linkage of a specialization constant parameter, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + struct vkd3d_shader_parameter_specialization_constant + { ++ /** The ID of the specialization constant. */ + uint32_t id; + }; + ++/** ++ * The linkage of a parameter specified through a uniform buffer, used in ++ * struct vkd3d_shader_parameter1. ++ */ ++struct vkd3d_shader_parameter_buffer ++{ ++ /** ++ * The set of the uniform buffer descriptor. If the target environment does ++ * not support descriptor sets, this value must be set to 0. ++ */ ++ unsigned int set; ++ /** The binding index of the uniform buffer descriptor. */ ++ unsigned int binding; ++ /** The byte offset of the parameter within the buffer. */ ++ uint32_t offset; ++}; ++ ++/** ++ * An individual shader parameter. ++ * ++ * This structure is an earlier version of struct vkd3d_shader_parameter1 ++ * which supports fewer parameter types; ++ * refer to that structure for usage information. ++ * ++ * Only the following types may be used with this structure: ++ * ++ * - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT ++ * - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT ++ */ + struct vkd3d_shader_parameter + { + enum vkd3d_shader_parameter_name name; +@@ -503,6 +631,56 @@ struct vkd3d_shader_parameter + } u; + }; + ++/** ++ * An individual shader parameter. ++ * ++ * This structure is used in struct vkd3d_shader_parameter_info; see there for ++ * explanation of shader parameters. ++ * ++ * For example, to specify the rasterizer sample count to the shader via an ++ * unsigned integer specialization constant with ID 3, ++ * set the following members: ++ * ++ * - \a name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT ++ * - \a type = VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT ++ * - \a data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32 ++ * - \a u.specialization_constant.id = 3 ++ * ++ * This structure is an extended version of struct vkd3d_shader_parameter. ++ */ ++struct vkd3d_shader_parameter1 ++{ ++ /** The builtin parameter to be mapped. */ ++ enum vkd3d_shader_parameter_name name; ++ /** How the parameter will be provided to the shader. */ ++ enum vkd3d_shader_parameter_type type; ++ /** ++ * The data type of the supplied parameter, which determines how it is to ++ * be interpreted. ++ */ ++ enum vkd3d_shader_parameter_data_type data_type; ++ union ++ { ++ /** ++ * Additional information if \a type is ++ * VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT. ++ */ ++ struct vkd3d_shader_parameter_immediate_constant immediate_constant; ++ /** ++ * Additional information if \a type is ++ * VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT. ++ */ ++ struct vkd3d_shader_parameter_specialization_constant specialization_constant; ++ /** ++ * Additional information if \a type is ++ * VKD3D_SHADER_PARAMETER_TYPE_BUFFER. ++ */ ++ struct vkd3d_shader_parameter_buffer buffer; ++ void *_pointer_pad; ++ uint32_t _pad[4]; ++ } u; ++}; ++ + /** + * Symbolic register indices for mapping uniform constant register sets in + * legacy Direct3D bytecode to constant buffer views in the target environment. +@@ -1994,6 +2172,44 @@ struct vkd3d_shader_varying_map_info + unsigned int varying_count; + }; + ++/** ++ * Interface information regarding a builtin shader parameter. ++ * ++ * Like compile options specified with struct vkd3d_shader_compile_option, ++ * parameters are used to specify certain values which are not part of the ++ * source shader bytecode but which need to be specified in the shader bytecode ++ * in the target format. ++ * Unlike struct vkd3d_shader_compile_option, however, this structure allows ++ * parameters to be specified in a variety of different ways, as described by ++ * enum vkd3d_shader_parameter_type. ++ * ++ * This structure is an extended version of struct vkd3d_shader_parameter as ++ * used in struct vkd3d_shader_spirv_target_info, which allows more parameter ++ * types to be used, and also allows specifying parameters when compiling ++ * shaders to target types other than SPIR-V. If this structure is chained ++ * along with vkd3d_shader_spirv_target_info, any parameters specified in the ++ * latter structure are ignored. ++ * ++ * This structure is passed to vkd3d_shader_compile() and extends ++ * vkd3d_shader_compile_info. ++ * ++ * This structure contains only input parameters. ++ * ++ * \since 1.13 ++ */ ++struct vkd3d_shader_parameter_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** Pointer to an array of dynamic parameters for this shader instance. */ ++ const struct vkd3d_shader_parameter1 *parameters; ++ /** Size, in elements, of \ref parameters. */ ++ unsigned int parameter_count; ++}; ++ + #ifdef LIBVKD3D_SHADER_SOURCE + # define VKD3D_SHADER_API VKD3D_EXPORT + #else +@@ -2077,6 +2293,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported + * - vkd3d_shader_descriptor_offset_info + * - vkd3d_shader_hlsl_source_info + * - vkd3d_shader_interface_info ++ * - vkd3d_shader_parameter_info + * - vkd3d_shader_preprocess_info + * - vkd3d_shader_scan_combined_resource_sampler_info + * - vkd3d_shader_scan_descriptor_info +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 4522d56c5c9..abfbd461b33 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1272,7 +1272,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st + sm1->end = &code[token_count]; + + /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) ++ if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 2176debc7d2..bf581928a9e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -10206,12 +10206,13 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 + return NULL; + } + +-static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, ++static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) + { + size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; ++ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + struct shader_signature *patch_constant_signature, *output_signature, *input_signature; +- const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; + const uint32_t *byte_code = dxbc_desc->byte_code; + unsigned int chunk_offset, chunk_size; +@@ -10302,9 +10303,9 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro + + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; +- if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) ++ if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); ++ vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); + sm6->ptr = &sm6->start[1]; + sm6->bitpos = 2; + +@@ -10565,7 +10566,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co + dxbc_desc.byte_code = byte_code; + } + +- ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); ++ ret = sm6_parser_init(&sm6, program, compile_info, message_context, &dxbc_desc); + free_dxbc_shader_desc(&dxbc_desc); + vkd3d_free(byte_code); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 7b058a65bc1..56736a65306 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -4065,6 +4065,17 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, rad, loc); + } + ++static bool intrinsic_rcp(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RCP, arg, loc); ++} ++ + static bool intrinsic_reflect(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4760,6 +4771,7 @@ intrinsic_functions[] = + {"normalize", 1, true, intrinsic_normalize}, + {"pow", 2, true, intrinsic_pow}, + {"radians", 1, true, intrinsic_radians}, ++ {"rcp", 1, true, intrinsic_rcp}, + {"reflect", 2, true, intrinsic_reflect}, + {"refract", 3, true, intrinsic_refract}, + {"round", 1, true, intrinsic_round}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 7e4f168675e..02884df9d76 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -5691,7 +5691,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +- if (!vsir_program_init(program, &version, 0)) ++ if (!vsir_program_init(program, NULL, &version, 0)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index e5432cb35ce..be9e4219d6a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -19,9 +19,73 @@ + #include "vkd3d_shader_private.h" + #include "vkd3d_types.h" + +-bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) ++static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, ++ unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) ++{ ++ const struct vkd3d_shader_spirv_target_info *spirv_info; ++ struct vkd3d_shader_parameter1 *parameters; ++ ++ *ret_count = 0; ++ *ret_parameters = NULL; ++ ++ if (!(spirv_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO)) || !spirv_info->parameter_count) ++ return VKD3D_OK; ++ ++ if (!(parameters = vkd3d_calloc(spirv_info->parameter_count, sizeof(*parameters)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ for (unsigned int i = 0; i < spirv_info->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter *src = &spirv_info->parameters[i]; ++ struct vkd3d_shader_parameter1 *dst = ¶meters[i]; ++ ++ dst->name = src->name; ++ dst->type = src->type; ++ dst->data_type = src->data_type; ++ ++ if (src->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ dst->u.immediate_constant = src->u.immediate_constant; ++ } ++ else if (src->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) ++ { ++ dst->u.specialization_constant = src->u.specialization_constant; ++ } ++ else ++ { ++ ERR("Invalid parameter type %#x.\n", src->type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ } ++ ++ *ret_count = spirv_info->parameter_count; ++ *ret_parameters = parameters; ++ ++ return VKD3D_OK; ++} ++ ++bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, ++ const struct vkd3d_shader_version *version, unsigned int reserve) + { + memset(program, 0, sizeof(*program)); ++ ++ if (compile_info) ++ { ++ const struct vkd3d_shader_parameter_info *parameter_info; ++ ++ if ((parameter_info = vkd3d_find_struct(compile_info->next, PARAMETER_INFO))) ++ { ++ program->parameter_count = parameter_info->parameter_count; ++ program->parameters = parameter_info->parameters; ++ } ++ else ++ { ++ if (convert_parameter_info(compile_info, &program->parameter_count, &program->parameters) < 0) ++ return false; ++ program->free_parameters = true; ++ } ++ } ++ + program->shader_version = *version; + return shader_instruction_array_init(&program->instructions, reserve); + } +@@ -30,6 +94,8 @@ void vsir_program_cleanup(struct vsir_program *program) + { + size_t i; + ++ if (program->free_parameters) ++ vkd3d_free((void *)program->parameters); + for (i = 0; i < program->block_name_count; ++i) + vkd3d_free((void *)program->block_names[i]); + vkd3d_free(program->block_names); +@@ -666,6 +732,12 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne + dst->write_mask = VKD3DSP_WRITEMASK_0; + } + ++static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ src->reg.idx[0].offset = idx; ++} ++ + static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) + { + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +@@ -678,6 +750,12 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 + src->reg.u.immconst_u32[0] = value; + } + ++static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) ++{ ++ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); ++ src->reg.idx[0].offset = idx; ++} ++ + void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode opcode) + { +@@ -5282,6 +5360,203 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru + return VKD3D_OK; + } + ++static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) ++{ ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ { ++ if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET ++ && !signature->elements[i].register_index) ++ { ++ *index = i; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, ++ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, ++ const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ ++ static const struct ++ { ++ enum vkd3d_shader_opcode float_opcode; ++ enum vkd3d_shader_opcode uint_opcode; ++ bool swap; ++ } ++ opcodes[] = ++ { ++ [VKD3D_SHADER_COMPARISON_FUNC_EQUAL] = {VKD3DSIH_EQO, VKD3DSIH_IEQ}, ++ [VKD3D_SHADER_COMPARISON_FUNC_NOT_EQUAL] = {VKD3DSIH_NEO, VKD3DSIH_INE}, ++ [VKD3D_SHADER_COMPARISON_FUNC_GREATER_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE}, ++ [VKD3D_SHADER_COMPARISON_FUNC_LESS] = {VKD3DSIH_LTO, VKD3DSIH_ULT}, ++ [VKD3D_SHADER_COMPARISON_FUNC_LESS_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE, true}, ++ [VKD3D_SHADER_COMPARISON_FUNC_GREATER] = {VKD3DSIH_LTO, VKD3DSIH_ULT, true}, ++ }; ++ ++ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) ++ { ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; ++ src_param_init_const_uint(&ins->src[0], 0); ++ ++ *ret_pos = pos + 1; ++ return VKD3D_OK; ++ } ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ switch (ref->data_type) ++ { ++ case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: ++ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); ++ src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); ++ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); ++ break; ++ ++ case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: ++ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); ++ src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); ++ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); ++ break; ++ ++ default: ++ FIXME("Unhandled parameter data type %#x.\n", ref->data_type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ dst_param_init_ssa_bool(&ins->dst[0], program->ssa_count); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ ++ ++ins; ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; ++ src_param_init_ssa_bool(&ins->src[0], program->ssa_count); ++ ++ ++program->ssa_count; ++ ++ ++ins; ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = colour_signature_idx; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].write_mask = program->output_signature.elements[colour_signature_idx].mask; ++ src_param_init_temp_float(&ins->src[0], colour_temp); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ *ret_pos = pos + 3; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; ++ static const struct vkd3d_shader_location no_loc; ++ enum vkd3d_shader_comparison_func compare_func; ++ uint32_t colour_signature_idx, colour_temp; ++ struct vkd3d_shader_instruction *ins; ++ size_t new_pos; ++ int ret; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; ++ ++ if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) ++ || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) ++ return VKD3D_OK; ++ ++ for (unsigned int i = 0; i < program->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; ++ ++ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC) ++ func = parameter; ++ else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF) ++ ref = parameter; ++ } ++ ++ if (!func || !ref) ++ return VKD3D_OK; ++ ++ if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported alpha test function parameter type %#x.\n", func->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid alpha test function parameter data type %#x.\n", func->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ compare_func = func->u.immediate_constant.u.u32; ++ ++ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_ALWAYS) ++ return VKD3D_OK; ++ ++ /* We're going to be reading from the output, so we need to go ++ * through the whole shader and convert it to a temp. */ ++ ++ if (compare_func != VKD3D_SHADER_COMPARISON_FUNC_NEVER) ++ colour_temp = program->temp_count++; ++ ++ for (size_t i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ if (ins->opcode == VKD3DSIH_RET) ++ { ++ if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, ++ ref, colour_signature_idx, colour_temp, &new_pos)) < 0) ++ return ret; ++ i = new_pos; ++ continue; ++ } ++ ++ /* No need to convert it if the comparison func is NEVER; we don't ++ * read from the output in that case. */ ++ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) ++ continue; ++ ++ for (size_t j = 0; j < ins->dst_count; ++j) ++ { ++ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; ++ ++ /* Note we run after I/O normalization. */ ++ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) ++ { ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = colour_temp; ++ } ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ + struct validation_context + { + struct vkd3d_shader_message_context *message_context; +@@ -6274,6 +6549,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t + return result; + } + ++ if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) ++ return result; ++ + if (TRACE_ON()) + vkd3d_shader_trace(program); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 524fb8e9b1f..72a6f1e60dc 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -2418,6 +2418,13 @@ struct spirv_compiler + uint32_t *descriptor_offset_ids; + struct vkd3d_push_constant_buffer_binding *push_constants; + const struct vkd3d_shader_spirv_target_info *spirv_target_info; ++ const struct vkd3d_shader_parameter1 *parameters; ++ unsigned int parameter_count; ++ ++ struct ++ { ++ uint32_t buffer_id; ++ } *spirv_parameter_info; + + bool prolog_emitted; + struct shader_signature input_signature; +@@ -3290,16 +3297,15 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil + return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); + } + +-static const struct vkd3d_shader_parameter *spirv_compiler_get_shader_parameter( ++static const struct vkd3d_shader_parameter1 *spirv_compiler_get_shader_parameter( + struct spirv_compiler *compiler, enum vkd3d_shader_parameter_name name) + { +- const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + unsigned int i; + +- for (i = 0; info && i < info->parameter_count; ++i) ++ for (i = 0; i < compiler->parameter_count; ++i) + { +- if (info->parameters[i].name == name) +- return &info->parameters[i]; ++ if (compiler->parameters[i].name == name) ++ return &compiler->parameters[i]; + } + + return NULL; +@@ -3314,6 +3320,7 @@ static const struct vkd3d_spec_constant_info + vkd3d_shader_parameters[] = + { + {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, ++ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, + }; + + static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) +@@ -3352,7 +3359,7 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com + } + + static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler, +- enum vkd3d_shader_parameter_name name, uint32_t spec_id) ++ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_spec_constant_info *info; +@@ -3361,7 +3368,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile + info = get_spec_constant_info(name); + default_value = info ? info->default_value : 0; + +- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ++ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); + id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); + +@@ -3380,7 +3387,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile + } + + static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, +- enum vkd3d_shader_parameter_name name, uint32_t spec_id) ++ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) + { + unsigned int i; + +@@ -3390,13 +3397,29 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler + return compiler->spec_constants[i].id; + } + +- return spirv_compiler_emit_spec_constant(compiler, name, spec_id); ++ return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type); ++} ++ ++static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ unsigned int index = parameter - compiler->parameters; ++ uint32_t type_id, ptr_id, ptr_type_id; ++ ++ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); ++ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); ++ ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, ++ compiler->spirv_parameter_info[index].buffer_id, ++ spirv_compiler_get_constant_uint(compiler, 0)); ++ return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); + } + +-static uint32_t spirv_compiler_emit_uint_shader_parameter(struct spirv_compiler *compiler, ++static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, + enum vkd3d_shader_parameter_name name) + { +- const struct vkd3d_shader_parameter *parameter; ++ const struct vkd3d_shader_parameter1 *parameter; ++ enum vkd3d_data_type type = VKD3D_DATA_UINT; + + if (!(parameter = spirv_compiler_get_shader_parameter(compiler, name))) + { +@@ -3405,15 +3428,28 @@ static uint32_t spirv_compiler_emit_uint_shader_parameter(struct spirv_compiler + } + + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) +- return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); ++ { ++ if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) ++ return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32); ++ else ++ return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); ++ } ++ ++ if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) ++ type = VKD3D_DATA_FLOAT; ++ else ++ type = VKD3D_DATA_UINT; ++ + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) +- return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id); ++ return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); ++ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) ++ return spirv_compiler_get_buffer_parameter(compiler, parameter, type); + + FIXME("Unhandled parameter type %#x.\n", parameter->type); + + default_parameter: + return spirv_compiler_get_spec_constant(compiler, +- name, spirv_compiler_alloc_spec_constant_id(compiler)); ++ name, spirv_compiler_alloc_spec_constant_id(compiler), type); + } + + static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, +@@ -4188,6 +4224,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); + else if (reg->type == VKD3DSPR_UNDEF) + return spirv_compiler_emit_load_undef(compiler, reg, write_mask); ++ else if (reg->type == VKD3DSPR_PARAMETER) ++ return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset); + + component_count = vsir_write_mask_component_count(write_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); +@@ -8129,6 +8167,8 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, + if (src->reg.data_type != VKD3D_DATA_BOOL) + condition_id = spirv_compiler_emit_int_to_bool(compiler, + instruction->flags, src->reg.data_type, 1, condition_id); ++ else if (instruction->flags & VKD3D_SHADER_CONDITIONAL_OP_Z) ++ condition_id = vkd3d_spirv_build_op_logical_not(builder, vkd3d_spirv_get_op_type_bool(builder), condition_id); + void_id = vkd3d_spirv_get_op_type_void(builder); + vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), + &condition_id, 1); +@@ -9525,7 +9565,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co + + if (src->reg.type == VKD3DSPR_RASTERIZER) + { +- val_id = spirv_compiler_emit_uint_shader_parameter(compiler, ++ val_id = spirv_compiler_emit_shader_parameter(compiler, + VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT); + } + else +@@ -10570,6 +10610,35 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + + spirv_compiler_emit_descriptor_declarations(compiler); + ++ compiler->parameter_count = program->parameter_count; ++ compiler->parameters = program->parameters; ++ compiler->spirv_parameter_info = vkd3d_calloc(compiler->parameter_count, sizeof(*compiler->spirv_parameter_info)); ++ for (i = 0; i < compiler->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &compiler->parameters[i]; ++ ++ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) ++ { ++ uint32_t type_id, struct_id, ptr_type_id, var_id; ++ ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ++ ++ struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); ++ vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); ++ vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, ++ SpvDecorationOffset, parameter->u.buffer.offset); ++ ++ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, struct_id); ++ var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, ++ ptr_type_id, SpvStorageClassUniform, 0); ++ ++ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationDescriptorSet, parameter->u.buffer.set); ++ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationBinding, parameter->u.buffer.binding); ++ ++ compiler->spirv_parameter_info[i].buffer_id = var_id; ++ } ++ } ++ + if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index a7c37215e5e..3a9a402e8e2 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -2493,7 +2493,7 @@ fail: + } + + static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, +- const uint32_t *byte_code, size_t byte_code_size, const char *source_name, ++ const uint32_t *byte_code, size_t byte_code_size, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_version version; +@@ -2552,9 +2552,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vsir_program_init(program, &version, token_count / 7u + 20)) ++ if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) + return false; +- vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); ++ vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); + sm4->ptr = sm4->start; + + init_sm4_lookup_tables(&sm4->lookup); +@@ -2651,7 +2651,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + } + + if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, +- compile_info->source_name, message_context)) ++ compile_info, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + free_dxbc_shader_desc(&dxbc_desc); +@@ -5189,6 +5189,44 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + } + break; + ++ case HLSL_OP1_RCP: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ /* SM5 comes with a RCP opcode */ ++ if (tpf->ctx->profile->major_version >= 5) ++ { ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); ++ } ++ else ++ { ++ /* For SM4, implement as DIV dst, 1.0, src */ ++ struct sm4_instruction instr; ++ struct hlsl_constant_value one; ++ ++ assert(type_is_float(dst_type)); ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_DIV; ++ ++ sm4_dst_from_node(&instr.dsts[0], &expr->node); ++ instr.dst_count = 1; ++ ++ for (unsigned int i = 0; i < 4; i++) ++ one.u[i].f = 1.0f; ++ sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); ++ sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(tpf, &instr); ++ } ++ break; ++ ++ default: ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); ++ } ++ break; ++ + case HLSL_OP1_REINTERPRET: + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 96e613669a6..bf9759ebbbf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -619,6 +619,7 @@ enum vkd3d_shader_register_type + VKD3DSPR_SSA, + VKD3DSPR_WAVELANECOUNT, + VKD3DSPR_WAVELANEINDEX, ++ VKD3DSPR_PARAMETER, + + VKD3DSPR_COUNT, + +@@ -1362,6 +1363,10 @@ struct vsir_program + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; + ++ unsigned int parameter_count; ++ const struct vkd3d_shader_parameter1 *parameters; ++ bool free_parameters; ++ + unsigned int input_control_point_count, output_control_point_count; + unsigned int flat_constant_count[3]; + unsigned int block_count; +@@ -1377,7 +1382,8 @@ void vsir_program_cleanup(struct vsir_program *program); + int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context); +-bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); ++bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, ++ const struct vkd3d_shader_version *version, unsigned int reserve); + enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-ccb6150aabc7cce9e26a39366c611f5a7da.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-ccb6150aabc7cce9e26a39366c611f5a7da.patch deleted file mode 100644 index 5258811b..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-ccb6150aabc7cce9e26a39366c611f5a7da.patch +++ /dev/null @@ -1,206 +0,0 @@ -From f9d3eae7971ce35994e20546183c7147d33c1d4a Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Sun, 23 Jun 2024 15:40:43 +1000 -Subject: [PATCH] Updated vkd3d to ccb6150aabc7cce9e26a39366c611f5a7da789e4. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 110 ++++++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 4 +- - 3 files changed, 108 insertions(+), 8 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 2290385da76..3665b99aed7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -2582,7 +2582,7 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - .srcs[0].type = D3DSPR_TEMP, - .srcs[0].reg = coords->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), - - .srcs[1].type = D3DSPR_SAMPLER, - .srcs[1].reg = reg_id, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index a2c89b15e4a..9c75c87d36e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1878,12 +1878,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned - return true; - } - -+static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) -+{ -+ /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. -+ * components are indexed by their sources. i.e. the first component comes from the first -+ * component of the rhs. */ -+ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; -+ -+ /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ -+ for (i = 0; i < 4; ++i) -+ { -+ if (*writemask & (1 << i)) -+ { -+ unsigned int s = (*swizzle >> (i * 8)) & 0xff; -+ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int idx = x + y * 4; -+ new_swizzle |= s << (bit++ * 8); -+ if (new_writemask & (1 << idx)) -+ return false; -+ new_writemask |= 1 << idx; -+ } -+ } -+ width = bit; -+ -+ /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the -+ * incoming vector. */ -+ bit = 0; -+ for (i = 0; i < 16; ++i) -+ { -+ for (j = 0; j < width; ++j) -+ { -+ unsigned int s = (new_swizzle >> (j * 8)) & 0xff; -+ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int idx = x + y * 4; -+ if (idx == i) -+ inverted |= j << (bit++ * 2); -+ } -+ } -+ -+ *swizzle = inverted; -+ *writemask = new_writemask; -+ *ret_width = width; -+ return true; -+} -+ - static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) - { - struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_node *copy; -- unsigned int writemask = 0; -+ unsigned int writemask = 0, width = 0; -+ bool matrix_writemask = false; - - if (assign_op == ASSIGN_OP_SUB) - { -@@ -1901,7 +1946,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - } - - if (hlsl_is_numeric_type(lhs_type)) -+ { - writemask = (1 << lhs_type->dimx) - 1; -+ width = lhs_type->dimx; -+ } - - if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) - return NULL; -@@ -1918,12 +1966,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); - struct hlsl_ir_node *new_swizzle; - uint32_t s = swizzle->swizzle; -- unsigned int width; - -- if (lhs->data_type->class == HLSL_CLASS_MATRIX) -- hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); -+ assert(!matrix_writemask); - -- if (!invert_swizzle(&s, &writemask, &width)) -+ if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) -+ { -+ if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) -+ { -+ hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); -+ return NULL; -+ } -+ if (!invert_swizzle_matrix(&s, &writemask, &width)) -+ { -+ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); -+ return NULL; -+ } -+ matrix_writemask = true; -+ } -+ else if (!invert_swizzle(&s, &writemask, &width)) - { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); - return NULL; -@@ -1971,7 +2031,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - -- if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) -+ if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, - "Resource store expressions must write to all components."); - -@@ -1987,12 +2047,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&resource_deref); - } -+ else if (matrix_writemask) -+ { -+ struct hlsl_deref deref; -+ unsigned int i, j, k = 0; -+ -+ hlsl_init_deref_from_index_chain(ctx, &deref, lhs); -+ -+ for (i = 0; i < lhs->data_type->dimy; ++i) -+ { -+ for (j = 0; j < lhs->data_type->dimx; ++j) -+ { -+ struct hlsl_ir_node *load; -+ struct hlsl_block store_block; -+ const unsigned int idx = i * 4 + j; -+ const unsigned int component = i * lhs->data_type->dimx + j; -+ -+ if (!(writemask & (1 << idx))) -+ continue; -+ -+ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) -+ { -+ hlsl_cleanup_deref(&deref); -+ return NULL; -+ } -+ -+ if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) -+ { -+ hlsl_cleanup_deref(&deref); -+ return NULL; -+ } -+ hlsl_block_add_block(block, &store_block); -+ } -+ } -+ -+ hlsl_cleanup_deref(&deref); -+ } - else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) - { - struct hlsl_ir_index *row = hlsl_ir_index(lhs); - struct hlsl_ir_node *mat = row->val.node; - unsigned int i, k = 0; - -+ assert(!matrix_writemask); -+ - for (i = 0; i < mat->data_type->dimx; ++i) - { - struct hlsl_ir_node *cell, *load, *store, *c; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 09066a6191a..36270b159a5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -4188,8 +4188,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, - { - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; - -+ /* FIXME: We could potentially pack structs or arrays more efficiently... */ -+ - if (type->class <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); -+ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); - else - return allocate_range(ctx, allocator, first_write, last_read, reg_size); - } --- -2.43.0 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-3dc43e8945f68c42268b8d5e43525b9e108.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-3dc43e8945f68c42268b8d5e43525b9e108.patch deleted file mode 100644 index c05fd1be..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-3dc43e8945f68c42268b8d5e43525b9e108.patch +++ /dev/null @@ -1,1883 +0,0 @@ -From 13ac90e6b53ef7af6231bec0fc2c3b5200215bf7 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 9 Jul 2024 07:22:05 +1000 -Subject: [PATCH] Updated vkd3d to 3dc43e8945f68c42268b8d5e43525b9e10806f77. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 24 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 454 +++++++++++++++++--- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 52 +++ - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 7 + - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 4 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 306 ++++++++++--- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 41 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 31 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 3 + - libs/vkd3d/libs/vkd3d/command.c | 5 +- - libs/vkd3d/libs/vkd3d/device.c | 1 + - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 1 + - 12 files changed, 793 insertions(+), 136 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 3665b99aed7..2482efc55d2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1497,13 +1497,16 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPC_OBJECT; -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - -@@ -1593,13 +1596,16 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPT_VERTEXSHADER; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - -@@ -1859,7 +1865,7 @@ struct sm1_instruction - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; -- } srcs[3]; -+ } srcs[4]; - unsigned int src_count; - - unsigned int has_dst; -@@ -2567,6 +2573,8 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - { - const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - struct hlsl_ir_node *coords = load->coords.node; -+ struct hlsl_ir_node *ddx = load->ddx.node; -+ struct hlsl_ir_node *ddy = load->ddy.node; - unsigned int sampler_offset, reg_id; - struct sm1_instruction sm1_instr; - -@@ -2607,6 +2615,20 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; - break; - -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ sm1_instr.opcode = D3DSIO_TEXLDD; -+ -+ sm1_instr.srcs[2].type = D3DSPR_TEMP; -+ sm1_instr.srcs[2].reg = ddx->reg.id; -+ sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); -+ -+ sm1_instr.srcs[3].type = D3DSPR_TEMP; -+ sm1_instr.srcs[3].reg = ddy->reg.id; -+ sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); -+ -+ sm1_instr.src_count += 2; -+ break; -+ - default: - hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); - return; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 3ba0a9ba994..75f10a18253 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -63,6 +63,7 @@ struct fx_write_context_ops - uint32_t (*write_string)(const char *string, struct fx_write_context *fx); - void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); - void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); -+ void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); - bool are_child_effects_supported; - }; - -@@ -94,6 +95,8 @@ struct fx_write_context - uint32_t texture_count; - uint32_t uav_count; - uint32_t sampler_state_count; -+ uint32_t depth_stencil_state_count; -+ uint32_t rasterizer_state_count; - int status; - - bool child_effect; -@@ -128,8 +131,41 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - fx->ops->write_pass(var, fx); - } - -+static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) -+{ -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_var *v; -+ uint32_t count = 0; -+ -+ if (!scope) -+ return 0; -+ -+ LIST_FOR_EACH_ENTRY(v, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (!v->default_values) -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Annotation variable is missing default value."); -+ -+ fx->ops->write_annotation(v, fx); -+ ++count; -+ } -+ -+ return count; -+} -+ -+static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count_offset, count; -+ -+ count_offset = put_u32(buffer, 0); -+ count = write_annotations(scope, fx); -+ set_u32(buffer, count_offset, count); -+} -+ - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); - static const char * get_fx_4_type_name(const struct hlsl_type *type); -+static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); - - static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -@@ -279,9 +315,9 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); - put_u32(buffer, 0); /* Assignment count. */ -- put_u32(buffer, 0); /* Annotation count. */ - -- /* TODO: annotations */ -+ write_fx_4_annotations(var->annotations, fx); -+ - /* TODO: assignments */ - } - -@@ -402,6 +438,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - return uav_type_names[type->sampler_dim]; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ return "DepthStencilState"; -+ - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - return "DepthStencilView"; - -@@ -421,10 +460,20 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -+ struct field_offsets -+ { -+ uint32_t name; -+ uint32_t semantic; -+ uint32_t offset; -+ uint32_t type; -+ }; -+ uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -- uint32_t name_offset, offset, size, stride, numeric_desc; -+ struct field_offsets *field_offsets = NULL; -+ struct hlsl_ctx *ctx = fx->ctx; - uint32_t elements_count = 0; - const char *name; -+ size_t i; - - /* Resolve arrays to element type and number of elements. */ - if (type->class == HLSL_CLASS_ARRAY) -@@ -436,6 +485,22 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - name = get_fx_4_type_name(type); - - name_offset = write_string(name, fx); -+ if (type->class == HLSL_CLASS_STRUCT) -+ { -+ if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) -+ return 0; -+ -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ -+ field_offsets[i].name = write_string(field->name, fx); -+ field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); -+ field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; -+ field_offsets[i].type = write_type(field->type, fx); -+ } -+ } -+ - offset = put_u32_unaligned(buffer, name_offset); - - switch (type->class) -@@ -446,8 +511,10 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, 1); - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: -@@ -464,6 +531,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_CONSTANT_BUFFER: - vkd3d_unreachable(); - - case HLSL_CLASS_STRING: -@@ -473,34 +541,40 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - return 0; - } - -- size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ /* Structures can only contain numeric fields, this is validated during variable declaration. */ -+ total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ packed_size = 0; -+ if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) -+ packed_size = hlsl_type_component_count(type) * sizeof(float); - if (elements_count) -- size *= elements_count; -+ { -+ total_size *= elements_count; -+ packed_size *= elements_count; -+ } - stride = align(stride, 4 * sizeof(float)); - - put_u32_unaligned(buffer, elements_count); -- put_u32_unaligned(buffer, size); /* Total size. */ -- put_u32_unaligned(buffer, stride); /* Stride. */ -- put_u32_unaligned(buffer, size); -+ put_u32_unaligned(buffer, total_size); -+ put_u32_unaligned(buffer, stride); -+ put_u32_unaligned(buffer, packed_size); - - if (type->class == HLSL_CLASS_STRUCT) - { -- size_t i; -- - put_u32_unaligned(buffer, type->e.record.field_count); - for (i = 0; i < type->e.record.field_count; ++i) - { -- const struct hlsl_struct_field *field = &type->e.record.fields[i]; -- uint32_t semantic_offset, field_type_offset; -+ const struct field_offsets *field = &field_offsets[i]; - -- name_offset = write_string(field->name, fx); -- semantic_offset = write_string(field->semantic.raw_name, fx); -- field_type_offset = write_type(field->type, fx); -+ put_u32_unaligned(buffer, field->name); -+ put_u32_unaligned(buffer, field->semantic); -+ put_u32_unaligned(buffer, field->offset); -+ put_u32_unaligned(buffer, field->type); -+ } - -- put_u32_unaligned(buffer, name_offset); -- put_u32_unaligned(buffer, semantic_offset); -- put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); -- put_u32_unaligned(buffer, field_type_offset); -+ if (ctx->profile->major_version == 5) -+ { -+ put_u32_unaligned(buffer, 0); /* Base class type */ -+ put_u32_unaligned(buffer, 0); /* Interface count */ - } - } - else if (type->class == HLSL_CLASS_TEXTURE) -@@ -556,6 +630,14 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - put_u32_unaligned(buffer, 6); - } -+ else if (type->class == HLSL_CLASS_RASTERIZER_STATE) -+ { -+ put_u32_unaligned(buffer, 4); -+ } -+ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) -+ { -+ put_u32_unaligned(buffer, 3); -+ } - else if (hlsl_is_numeric_type(type)) - { - numeric_desc = get_fx_4_numeric_type_description(type, fx); -@@ -565,9 +647,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - FIXME("Type %u is not supported.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -- return 0; - } - -+ vkd3d_free(field_offsets); - return offset; - } - -@@ -581,8 +663,9 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); - count_offset = put_u32(buffer, 0); -- put_u32(buffer, 0); /* Annotation count. */ -+ write_fx_4_annotations(var->annotations, fx); - -+ count = 0; - LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) - { - write_pass(pass, fx); -@@ -617,7 +700,7 @@ static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) - - put_u32(buffer, name_offset); - count_offset = put_u32(buffer, 0); /* Technique count */ -- put_u32(buffer, 0); /* Annotation count */ -+ write_fx_4_annotations(var ? var->annotations : NULL, fx); - - count = fx->technique_count; - write_techniques(var ? var->scope : fx->ctx->globals, fx); -@@ -853,8 +936,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); - return false; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_VOID: - return false; -@@ -862,6 +947,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_CONSTANT_BUFFER: - /* This cannot appear as an extern variable. */ - break; - } -@@ -975,9 +1061,72 @@ static const struct fx_write_context_ops fx_4_ops = - .write_string = write_fx_4_string, - .write_technique = write_fx_4_technique, - .write_pass = write_fx_4_pass, -+ .write_annotation = write_fx_4_annotation, - .are_child_effects_supported = true, - }; - -+static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, -+ struct fx_write_context *fx) -+{ -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); -+ uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t offset = buffer->size; -+ unsigned int comp_count; -+ -+ if (!value) -+ return 0; -+ -+ comp_count = hlsl_type_component_count(type); -+ -+ for (i = 0; i < elements_count; ++i) -+ { -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ { -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ -+ for (j = 0; j < comp_count; ++j) -+ { -+ put_u32_unaligned(buffer, value->value.u); -+ value++; -+ } -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", -+ type->e.numeric.type); -+ } -+ -+ break; -+ } -+ case HLSL_CLASS_STRUCT: -+ { -+ struct hlsl_struct_field *fields = type->e.record.fields; -+ -+ for (j = 0; j < type->e.record.field_count; ++j) -+ { -+ write_fx_4_default_value(fields[i].type, value, fx); -+ value += hlsl_type_component_count(fields[i].type); -+ } -+ break; -+ } -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); -+ } -+ } -+ -+ return offset; -+} -+ - static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -@@ -987,7 +1136,6 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - { - HAS_EXPLICIT_BIND_POINT = 0x4, - }; -- struct hlsl_ctx *ctx = fx->ctx; - - if (var->has_explicit_bind_point) - flags |= HAS_EXPLICIT_BIND_POINT; -@@ -1001,7 +1149,7 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - - semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ - put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ -- value_offset = put_u32(buffer, 0); /* Default value offset */ -+ value_offset = put_u32(buffer, 0); - put_u32(buffer, flags); /* Flags */ - - if (shared) -@@ -1010,19 +1158,39 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - } - else - { -- /* FIXME: write default value */ -- set_u32(buffer, value_offset, 0); -- if (var->default_values) -- hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); -+ uint32_t offset = write_fx_4_default_value(var->data_type, var->default_values, fx); -+ set_u32(buffer, value_offset, offset); - -- put_u32(buffer, 0); /* Annotations count */ -- if (has_annotations(var)) -- hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); -+ write_fx_4_annotations(var->annotations, fx); - - fx->numeric_variable_count++; - } - } - -+static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t name_offset, type_offset, offset; -+ struct hlsl_ctx *ctx = fx->ctx; -+ -+ name_offset = write_string(var->name, fx); -+ type_offset = write_type(var->data_type, fx); -+ -+ put_u32(buffer, name_offset); -+ put_u32(buffer, type_offset); -+ -+ if (hlsl_is_numeric_type(type)) -+ { -+ offset = write_fx_4_default_value(var->data_type, var->default_values, fx); -+ put_u32(buffer, offset); -+ } -+ else -+ { -+ hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); -+ } -+} -+ - struct rhs_named_value - { - const char *name; -@@ -1164,6 +1332,41 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no - return true; - } - -+static void fold_state_value(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry) -+{ -+ bool progress; -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); -+ } while (progress); -+} -+ -+enum state_property_component_type -+{ -+ FX_BOOL, -+ FX_FLOAT, -+ FX_UINT, -+ FX_UINT8, -+}; -+ -+static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) -+{ -+ switch (type) -+ { -+ case FX_BOOL: -+ return HLSL_TYPE_BOOL; -+ case FX_FLOAT: -+ return HLSL_TYPE_FLOAT; -+ case FX_UINT: -+ case FX_UINT8: -+ return HLSL_TYPE_UINT; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ - static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, - struct fx_write_context *fx) - { -@@ -1213,37 +1416,112 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - { NULL } - }; - -+ static const struct rhs_named_value depth_write_mask_values[] = -+ { -+ { "ZERO", 0 }, -+ { "ALL", 1 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value comparison_values[] = -+ { -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value stencil_op_values[] = -+ { -+ { "KEEP", 1 }, -+ { "ZERO", 2 }, -+ { "REPLACE", 3 }, -+ { "INCR_SAT", 4 }, -+ { "DECR_SAT", 5 }, -+ { "INVERT", 6 }, -+ { "INCR", 7 }, -+ { "DECR", 8 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value fill_values[] = -+ { -+ { "WIREFRAME", 2 }, -+ { "SOLID", 3 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value cull_values[] = -+ { -+ { "NONE", 1 }, -+ { "FRONT", 2 }, -+ { "BACK", 3 }, -+ { NULL } -+ }; -+ - static const struct state - { - const char *name; - enum hlsl_type_class container; -- enum hlsl_base_type type; -+ enum hlsl_type_class class; -+ enum state_property_component_type type; - unsigned int dimx; - uint32_t id; - const struct rhs_named_value *values; - } - states[] = - { -- { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, -- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, -- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, -- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, -- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, -- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, -- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, -- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, -- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, -- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, -+ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 12, fill_values }, -+ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 13, cull_values }, -+ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 14 }, -+ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 15 }, -+ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 16 }, -+ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 17 }, -+ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 18 }, -+ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 19 }, -+ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 20 }, -+ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 21 }, -+ -+ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 22 }, -+ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 23, depth_write_mask_values }, -+ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 24, comparison_values }, -+ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 25 }, -+ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 26 }, -+ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 27 }, -+ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 28, stencil_op_values }, -+ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 29, stencil_op_values }, -+ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 30, stencil_op_values }, -+ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 31, comparison_values }, -+ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 32, stencil_op_values }, -+ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 33, stencil_op_values }, -+ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 34, stencil_op_values }, -+ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 35, comparison_values }, -+ -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 54 }, - /* TODO: "Texture" field */ - }; - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - struct replace_state_context replace_context; -+ struct hlsl_type *state_type = NULL; - struct hlsl_ir_node *node, *cast; - const struct state *state = NULL; - struct hlsl_ctx *ctx = fx->ctx; -- struct hlsl_type *state_type; -+ enum hlsl_base_type base_type; - unsigned int i; -- bool progress; - - for (i = 0; i < ARRAY_SIZE(states); ++i) - { -@@ -1273,28 +1551,54 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - replace_context.values = state->values; - replace_context.var = var; - -- /* Turned named constants to actual constants. */ -+ /* Turn named constants to actual constants. */ - hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); -+ fold_state_value(ctx, entry); - -- if (state->dimx) -- state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); -- else -- state_type = hlsl_get_scalar_type(ctx, state->type); -+ /* Now cast and run folding again. */ - -- /* Cast to expected property type. */ -- node = entry->args->node; -- if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) -- return; -- list_add_after(&node->entry, &cast->entry); -- -- hlsl_src_remove(entry->args); -- hlsl_src_from_node(entry->args, cast); -+ base_type = hlsl_type_from_fx_type(state->type); -+ switch (state->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ state_type = hlsl_get_vector_type(ctx, base_type, state->dimx); -+ break; -+ case HLSL_CLASS_SCALAR: -+ state_type = hlsl_get_scalar_type(ctx, base_type); -+ break; -+ case HLSL_CLASS_TEXTURE: -+ hlsl_fixme(ctx, &ctx->location, "Object type fields are not supported."); -+ break; -+ default: -+ ; -+ } - -- do -+ if (state_type) - { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); -- } while (progress); -+ node = entry->args->node; -+ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) -+ return; -+ list_add_after(&node->entry, &cast->entry); -+ -+ /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ -+ if (state->type == FX_UINT8) -+ { -+ struct hlsl_ir_node *mask; -+ -+ if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) -+ return; -+ list_add_after(&cast->entry, &mask->entry); -+ -+ if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) -+ return; -+ list_add_after(&mask->entry, &cast->entry); -+ } -+ -+ hlsl_src_remove(entry->args); -+ hlsl_src_from_node(entry->args, cast); -+ -+ fold_state_value(ctx, entry); -+ } - } - - static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -@@ -1387,19 +1691,27 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - fx->dsv_count += elements_count; - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->depth_stencil_state_count += elements_count; -+ break; -+ - case HLSL_CLASS_SAMPLER: - write_fx_4_state_object_initializer(var, fx); - fx->sampler_state_count += elements_count; - break; - -+ case HLSL_CLASS_RASTERIZER_STATE: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->rasterizer_state_count += elements_count; -+ break; -+ - default: - hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", - type->e.numeric.type); - } - -- put_u32(buffer, 0); /* Annotations count */ -- if (has_annotations(var)) -- hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); -+ write_fx_4_annotations(var->annotations, fx); - - ++fx->object_variable_count; - } -@@ -1442,9 +1754,7 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - } - else - { -- put_u32(buffer, 0); /* Annotations count */ -- if (b->annotations) -- hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); -+ write_fx_4_annotations(b->annotations, fx); - ++fx->buffer_count; - } - -@@ -1490,8 +1800,10 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc - - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: -@@ -1558,9 +1870,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ - put_u32(&buffer, fx.texture_count); -- put_u32(&buffer, 0); /* Depth stencil state count. */ -+ put_u32(&buffer, fx.depth_stencil_state_count); - put_u32(&buffer, 0); /* Blend state count. */ -- put_u32(&buffer, 0); /* Rasterizer state count. */ -+ put_u32(&buffer, fx.rasterizer_state_count); - put_u32(&buffer, fx.sampler_state_count); - put_u32(&buffer, fx.rtv_count); - put_u32(&buffer, fx.dsv_count); -@@ -1616,9 +1928,9 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ - put_u32(&buffer, fx.texture_count); -- put_u32(&buffer, 0); /* Depth stencil state count. */ -+ put_u32(&buffer, fx.depth_stencil_state_count); - put_u32(&buffer, 0); /* Blend state count. */ -- put_u32(&buffer, 0); /* Rasterizer state count. */ -+ put_u32(&buffer, fx.rasterizer_state_count); - put_u32(&buffer, fx.sampler_state_count); - put_u32(&buffer, fx.rtv_count); - put_u32(&buffer, fx.dsv_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index c69af4e94bb..a157590c97a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -369,15 +369,18 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - type->reg_size[HLSL_REGSET_UAVS] = 1; - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - } -@@ -437,11 +440,13 @@ static bool type_is_single_component(const struct hlsl_type *type) - { - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -@@ -452,6 +457,7 @@ static bool type_is_single_component(const struct hlsl_type *type) - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_CONSTANT_BUFFER: - return false; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -530,6 +536,12 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - } - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ { -+ *type_ptr = type->e.resource.format; -+ return traverse_path_from_component_index(ctx, type_ptr, index_ptr); -+ } -+ - default: - vkd3d_unreachable(); - } -@@ -581,8 +593,10 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - } - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -597,6 +611,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_CONSTANT_BUFFER: - vkd3d_unreachable(); - } - type = next_type; -@@ -870,6 +885,20 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim - return type; - } - -+struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) -+ return NULL; -+ type->class = HLSL_CLASS_CONSTANT_BUFFER; -+ type->dimy = 1; -+ type->e.resource.format = format; -+ hlsl_type_calculate_reg_size(ctx, type); -+ list_add_tail(&ctx->types, &type->entry); -+ return type; -+} -+ - static const char * get_case_insensitive_typename(const char *name) - { - static const char *const names[] = -@@ -961,8 +990,13 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ return hlsl_type_component_count(type->e.resource.format); -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -1043,10 +1077,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - case HLSL_CLASS_TECHNIQUE: - return t1->e.version == t2->e.version; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: - case HLSL_CLASS_VERTEX_SHADER: -@@ -2413,10 +2452,21 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - return string; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ vkd3d_string_buffer_printf(string, "ConstantBuffer"); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } -+ return string; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -3761,9 +3811,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 179cc219e68..3e0d55a7f7d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -78,10 +78,12 @@ enum hlsl_type_class - HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, - HLSL_CLASS_STRUCT, - HLSL_CLASS_ARRAY, -+ HLSL_CLASS_DEPTH_STENCIL_STATE, - HLSL_CLASS_DEPTH_STENCIL_VIEW, - HLSL_CLASS_EFFECT_GROUP, - HLSL_CLASS_PASS, - HLSL_CLASS_PIXEL_SHADER, -+ HLSL_CLASS_RASTERIZER_STATE, - HLSL_CLASS_RENDER_TARGET_VIEW, - HLSL_CLASS_SAMPLER, - HLSL_CLASS_STRING, -@@ -89,6 +91,7 @@ enum hlsl_type_class - HLSL_CLASS_TEXTURE, - HLSL_CLASS_UAV, - HLSL_CLASS_VERTEX_SHADER, -+ HLSL_CLASS_CONSTANT_BUFFER, - HLSL_CLASS_VOID, - }; - -@@ -385,6 +388,7 @@ struct hlsl_attribute - #define HLSL_STORAGE_LINEAR 0x00010000 - #define HLSL_MODIFIER_SINGLE 0x00020000 - #define HLSL_MODIFIER_EXPORT 0x00040000 -+#define HLSL_STORAGE_ANNOTATION 0x00080000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -838,6 +842,8 @@ struct hlsl_scope - bool loop; - /* The scope was created for the switch statement. */ - bool _switch; -+ /* The scope contains annotation variables. */ -+ bool annotations; - }; - - struct hlsl_profile_info -@@ -1391,6 +1397,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ - unsigned int sample_count); - struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - struct hlsl_type *format, bool rasteriser_ordered); -+struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); - struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 91418775e1b..55993dac2b4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -198,7 +198,9 @@ while {return KW_WHILE; } - struct hlsl_ctx *ctx = yyget_extra(yyscanner); - - yylval->name = hlsl_strdup(ctx, yytext); -- if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) -+ if (hlsl_version_ge(ctx, 5, 1) && !strcmp(yytext, "ConstantBuffer")) -+ return KW_CONSTANTBUFFER; -+ else if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) - return VAR_IDENTIFIER; - else if (hlsl_get_type(ctx->cur_scope, yytext, true, true)) - return TYPE_IDENTIFIER; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 9c75c87d36e..a02692399f7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -1214,12 +1214,42 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl - return true; - } - --static bool parse_reservation_index(const char *string, char *type, uint32_t *index) -+static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, -+ struct hlsl_reg_reservation *reservation) - { -- if (!sscanf(string + 1, "%u", index)) -- return false; -+ char *endptr; -+ -+ reservation->reg_type = ascii_tolower(string[0]); -+ -+ /* Prior to SM5.1, fxc simply ignored bracket offsets for 'b' types. */ -+ if (reservation->reg_type == 'b' && hlsl_version_lt(ctx, 5, 1)) -+ { -+ bracket_offset = 0; -+ } -+ -+ if (string[1] == '\0') -+ { -+ reservation->reg_index = bracket_offset; -+ return true; -+ } -+ -+ reservation->reg_index = strtoul(string + 1, &endptr, 10) + bracket_offset; -+ -+ if (*endptr) -+ { -+ /* fxc for SM >= 4 treats all parse failures for 'b' types as successes, -+ * setting index to -1. It will later fail while validating slot limits. */ -+ if (reservation->reg_type == 'b' && hlsl_version_ge(ctx, 4, 0)) -+ { -+ reservation->reg_index = -1; -+ return true; -+ } -+ -+ /* All other types tolerate leftover characters. */ -+ if (endptr == string + 1) -+ return false; -+ } - -- *type = ascii_tolower(string[0]); - return true; - } - -@@ -2194,6 +2224,9 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl - if (ctx->profile->major_version < 4) - return index; - -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) -+ return index; -+ - switch (type->class) - { - case HLSL_CLASS_MATRIX: -@@ -2346,6 +2379,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - struct hlsl_semantic new_semantic; - uint32_t modifiers = v->modifiers; - bool unbounded_res_array = false; -+ bool constant_buffer = false; - struct hlsl_ir_var *var; - struct hlsl_type *type; - bool local = true; -@@ -2365,6 +2399,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); - } - -+ if (type->class == HLSL_CLASS_CONSTANT_BUFFER) -+ { -+ type = type->e.resource.format; -+ constant_buffer = true; -+ } -+ - if (unbounded_res_array) - { - if (v->arrays.count == 1) -@@ -2446,7 +2486,16 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - return; - } - -- var->buffer = ctx->cur_buffer; -+ if (constant_buffer && ctx->cur_scope == ctx->globals) -+ { -+ if (!(var_name = vkd3d_strdup(v->name))) -+ return; -+ var->buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, var_name, modifiers, &v->reg_reservation, NULL, &v->loc); -+ } -+ else -+ { -+ var->buffer = ctx->cur_buffer; -+ } - - if (var->buffer == ctx->globals_buffer) - { -@@ -2469,8 +2518,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) -+ && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ { - check_invalid_object_fields(ctx, var); -+ } - - if ((func = hlsl_get_first_func_decl(ctx, var->name))) - { -@@ -2576,11 +2628,19 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - unsigned int size, k; - - is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) -- || (var->storage_modifiers & HLSL_STORAGE_UNIFORM); -+ || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ || ctx->cur_scope->annotations; - - if (is_default_values_initializer) - { -- assert(!var->default_values); -+ /* Default values might have been allocated already for another variable of the same name, -+ in the same scope. */ -+ if (var->default_values) -+ { -+ free_parse_variable_def(v); -+ continue; -+ } -+ - if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) - { - free_parse_variable_def(v); -@@ -4233,6 +4293,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, - const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) - { -+ unsigned int sampler_dim = hlsl_sampler_dim_count(dim); - struct hlsl_resource_load_params load_params = { 0 }; - const struct hlsl_type *sampler_type; - struct hlsl_ir_node *coords, *sample; -@@ -4244,11 +4305,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - } - -- if (params->args_count == 4) -- { -- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); -- } -- - sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_SAMPLER - || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) -@@ -4272,12 +4328,12 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - else - load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - -- if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) -+ if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) - return false; - hlsl_block_add_instr(params->instrs, c); - -- if (!(coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, -- hlsl_sampler_dim_count(dim)), loc))) -+ if (!(coords = add_implicit_conversion(ctx, params->instrs, c, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - { - return false; - } -@@ -4304,14 +4360,13 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (hlsl_version_ge(ctx, 4, 0)) - { -- unsigned int count = hlsl_sampler_dim_count(dim); - struct hlsl_ir_node *divisor; - -- if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), count, coords, loc))) -+ if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) - return false; - hlsl_block_add_instr(params->instrs, divisor); - -- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), count, coords, loc))) -+ if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) - return false; - hlsl_block_add_instr(params->instrs, coords); - -@@ -4325,12 +4380,34 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; - } - } -+ else if (params->args_count == 4) /* Gradient sampling. */ -+ { -+ if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; -+ } - else - { - load_params.type = HLSL_RESOURCE_SAMPLE; - - if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - { - return false; - } -@@ -4386,6 +4463,12 @@ static bool intrinsic_tex1D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); - } - -+static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); -+} -+ - static bool intrinsic_tex2D(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4398,6 +4481,12 @@ static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); - } - -+static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); -+} -+ - static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4416,6 +4505,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); - } - -+static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); -+} -+ - static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4428,6 +4523,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); - } - -+static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); -+} -+ - static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4617,13 +4718,17 @@ intrinsic_functions[] = - {"tan", 1, true, intrinsic_tan}, - {"tanh", 1, true, intrinsic_tanh}, - {"tex1D", -1, false, intrinsic_tex1D}, -+ {"tex1Dgrad", 4, false, intrinsic_tex1Dgrad}, - {"tex2D", -1, false, intrinsic_tex2D}, - {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, -+ {"tex2Dgrad", 4, false, intrinsic_tex2Dgrad}, - {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, - {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, - {"tex3D", -1, false, intrinsic_tex3D}, -+ {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, - {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, - {"texCUBE", -1, false, intrinsic_texCUBE}, -+ {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, - {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, - {"transpose", 1, true, intrinsic_transpose}, - {"trunc", 1, true, intrinsic_trunc}, -@@ -5693,6 +5798,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_BREAK - %token KW_BUFFER - %token KW_CASE -+%token KW_CONSTANTBUFFER - %token KW_CBUFFER - %token KW_CENTROID - %token KW_COLUMN_MAJOR -@@ -5883,6 +5989,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - - %type if_body - -+%type array -+ - %type var_modifiers - - %type any_identifier -@@ -5954,19 +6062,31 @@ pass: - - annotations_list: - variables_def_typed ';' -+ { -+ struct hlsl_block *block; -+ -+ block = initialize_vars(ctx, $1); -+ destroy_block(block); -+ } - | annotations_list variables_def_typed ';' -+ { -+ struct hlsl_block *block; -+ -+ block = initialize_vars(ctx, $2); -+ destroy_block(block); -+ } - - annotations_opt: - %empty - { - $$ = NULL; - } -- | '<' scope_start '>' -+ | '<' annotations_scope_start '>' - { - hlsl_pop_scope(ctx); - $$ = NULL; - } -- | '<' scope_start annotations_list '>' -+ | '<' annotations_scope_start annotations_list '>' - { - struct hlsl_scope *scope = ctx->cur_scope; - -@@ -6494,6 +6614,13 @@ switch_scope_start: - ctx->cur_scope->_switch = true; - } - -+annotations_scope_start: -+ %empty -+ { -+ hlsl_push_scope(ctx); -+ ctx->cur_scope->annotations = true; -+ } -+ - var_identifier: - VAR_IDENTIFIER - | NEW_IDENTIFIER -@@ -6545,22 +6672,34 @@ register_reservation: - ':' KW_REGISTER '(' any_identifier ')' - { - memset(&$$, 0, sizeof($$)); -- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $4, 0, &$$)) - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $4); - - vkd3d_free($4); - } -+ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ')' -+ { -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) -+ { -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ } -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' - { - memset(&$$, 0, sizeof($$)); -- if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ if (parse_reservation_index(ctx, $6, 0, &$$)) - { - hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); - } - else if (parse_reservation_space($6, &$$.reg_space)) - { -- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $4, 0, &$$)) - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $4); - } -@@ -6573,12 +6712,45 @@ register_reservation: - vkd3d_free($4); - vkd3d_free($6); - } -+ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ',' any_identifier ')' -+ { -+ memset(&$$, 0, sizeof($$)); -+ -+ if (!parse_reservation_space($9, &$$.reg_space)) -+ hlsl_error(ctx, &@9, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $9); -+ -+ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) -+ { -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ } -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($9); -+ } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ } -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($8); -+ } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' - { - hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); - - memset(&$$, 0, sizeof($$)); -- if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $6, 0, &$$)) - hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $6); - -@@ -6590,6 +6762,26 @@ register_reservation: - vkd3d_free($6); - vkd3d_free($8); - } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ',' any_identifier ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ } -+ -+ if (!parse_reservation_space($11, &$$.reg_space)) -+ hlsl_error(ctx, &@11, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $11); -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($8); -+ vkd3d_free($11); -+ } - - packoffset_reservation: - ':' KW_PACKOFFSET '(' any_identifier ')' -@@ -6962,6 +7154,10 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "RenderTargetView", true, true); - } -+ | KW_DEPTHSTENCILSTATE -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilState", true, true); -+ } - | KW_DEPTHSTENCILVIEW - { - $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); -@@ -6974,6 +7170,17 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); - } -+ | KW_CONSTANTBUFFER '<' type '>' -+ { -+ if ($3->class != HLSL_CLASS_STRUCT) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "ConstantBuffer<...> requires user-defined structure type."); -+ $$ = hlsl_new_cb_type(ctx, $3); -+ } -+ | KW_RASTERIZERSTATE -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); -+ } - - type: - type_no_void -@@ -7258,52 +7465,43 @@ variable_def_typed: - $$->modifiers_loc = @1; - } - --arrays: -- %empty -+array: -+ '[' ']' - { -- $$.sizes = NULL; -- $$.count = 0; -+ $$ = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; - } -- | '[' expr ']' arrays -+ | '[' expr ']' - { -- uint32_t *new_array; -- unsigned int size; -- -- size = evaluate_static_expression_as_uint(ctx, $2, &@2); -- -- destroy_block($2); -- -- $$ = $4; -+ $$ = evaluate_static_expression_as_uint(ctx, $2, &@2); - -- if (!size) -+ if (!$$) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, - "Array size is not a positive integer constant."); -- vkd3d_free($$.sizes); - YYABORT; - } - -- if (size > 65536) -+ if ($$ > 65536) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, -- "Array size %u is not between 1 and 65536.", size); -- vkd3d_free($$.sizes); -+ "Array size %u is not between 1 and 65536.", $$); - YYABORT; - } - -- if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) -- { -- vkd3d_free($$.sizes); -- YYABORT; -- } -- $$.sizes = new_array; -- $$.sizes[$$.count++] = size; -+ destroy_block($2); -+ } -+ -+arrays: -+ %empty -+ { -+ $$.sizes = NULL; -+ $$.count = 0; - } -- | '[' ']' arrays -+ | array arrays - { - uint32_t *new_array; - -- $$ = $3; -+ $$ = $2; - - if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) - { -@@ -7312,7 +7510,7 @@ arrays: - } - - $$.sizes = new_array; -- $$.sizes[$$.count++] = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; -+ $$.sizes[$$.count++] = $1; - } - - var_modifiers: -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 36270b159a5..33845b0d4bf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1631,9 +1631,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -@@ -1643,6 +1645,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_ARRAY: - case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_CONSTANT_BUFFER: - /* FIXME: Actually we shouldn't even get here, but we don't split - * matrices yet. */ - return false; -@@ -2562,11 +2565,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - case HLSL_RESOURCE_RESINFO: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: -- case HLSL_RESOURCE_SAMPLE_GRAD: - case HLSL_RESOURCE_SAMPLE_INFO: - return false; - - case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_GRAD: - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_PROJ: -@@ -4598,6 +4601,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) - - static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -+ struct register_allocator allocator_used = {0}; - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -@@ -4606,6 +4610,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC]; - - if (!var->is_uniform || reg_size == 0) - continue; -@@ -4618,12 +4623,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - assert(reg_size % 4 == 0); - for (i = 0; i < reg_size / 4; ++i) - { -- if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ if (i < bind_count) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -- "Overlapping register() reservations on 'c%u'.", reg_idx + i); -+ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Overlapping register() reservations on 'c%u'.", reg_idx + i); -+ } -+ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); - } -- - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); - } - -@@ -4636,6 +4644,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - } - } - -+ vkd3d_free(allocator_used.allocations); -+ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; -@@ -4777,7 +4787,7 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 - - LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) - { -- if (buffer->used_size && buffer->reservation.reg_type == 'b' -+ if (buffer->reservation.reg_type == 'b' - && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) - return buffer; - } -@@ -4925,6 +4935,14 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) - } - } - -+static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx) -+{ -+ if (hlsl_version_ge(ctx, 5, 1)) -+ return UINT_MAX; -+ -+ return 13; -+} -+ - static void allocate_buffers(struct hlsl_ctx *ctx) - { - struct hlsl_buffer *buffer; -@@ -4956,6 +4974,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - { - const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, - reservation->reg_space, reservation->reg_index); -+ unsigned int max_index = get_max_cbuffer_reg_index(ctx); -+ -+ if (buffer->reservation.reg_index > max_index) -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Buffer reservation cb%u exceeds target's maximum (cb%u).", -+ buffer->reservation.reg_index, max_index); - - if (reserved_buffer && reserved_buffer != buffer) - { -@@ -4980,9 +5004,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - else if (!reservation->reg_type) - { -+ unsigned int max_index = get_max_cbuffer_reg_index(ctx); - while (get_reserved_buffer(ctx, 0, index)) - ++index; - -+ if (index > max_index) -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Too many buffers allocated, target's maximum is %u.", max_index); -+ - buffer->reg.space = 0; - buffer->reg.index = index; - if (hlsl_version_ge(ctx, 5, 1)) -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 5f99be632f2..e5432cb35ce 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -1899,13 +1899,42 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - ins->src_count = 3; - break; - -+ case VKD3DSIH_TEXLDD: -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ memset(srcs, 0, sizeof(*srcs) * 5); -+ -+ ins->opcode = VKD3DSIH_SAMPLE_GRAD; -+ -+ srcs[0] = ins->src[0]; -+ -+ srcs[1].reg.type = VKD3DSPR_RESOURCE; -+ srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; -+ srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; -+ srcs[1].reg.idx_count = 2; -+ srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; -+ srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ srcs[2].reg.type = VKD3DSPR_SAMPLER; -+ srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; -+ srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; -+ srcs[2].reg.idx_count = 2; -+ srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; -+ -+ srcs[3] = ins->src[2]; -+ srcs[4] = ins->src[3]; -+ -+ ins->src = srcs; -+ ins->src_count = 5; -+ break; -+ - case VKD3DSIH_TEXBEM: - case VKD3DSIH_TEXBEML: - case VKD3DSIH_TEXCOORD: - case VKD3DSIH_TEXDEPTH: - case VKD3DSIH_TEXDP3: - case VKD3DSIH_TEXDP3TEX: -- case VKD3DSIH_TEXLDD: - case VKD3DSIH_TEXLDL: - case VKD3DSIH_TEXM3x2PAD: - case VKD3DSIH_TEXM3x2TEX: -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 24206ae9a4d..ca7cdfd5217 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2984,11 +2984,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - return D3D_SVC_VECTOR; - - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -2997,6 +2999,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - vkd3d_unreachable(); -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 6c463be8d60..2354938c08d 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2034,11 +2034,12 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) - { - queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT -- | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT -- | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT - | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; - if (device->vk_info.geometry_shaders) - queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; -+ if (device->vk_info.tessellation_shaders) -+ queue_shader_stages |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT -+ | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; - } - if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) - queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 674e46fe5c5..2bbc170504e 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1644,6 +1644,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; - vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; -+ vulkan_info->tessellation_shaders = physical_device_info->features2.features.tessellationShader; - vulkan_info->sparse_binding = features->sparseBinding; - vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index a55e967cdfc..7acd39d65be 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -147,6 +147,7 @@ struct vkd3d_vulkan_info - bool rasterization_stream; - bool transform_feedback_queries; - bool geometry_shaders; -+ bool tessellation_shaders; - - bool uav_read_without_format; - --- -2.43.0 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-746222b349e9c009ed270fb5ca400497dfb.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-746222b349e9c009ed270fb5ca400497dfb.patch deleted file mode 100644 index 851a2cc0..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-746222b349e9c009ed270fb5ca400497dfb.patch +++ /dev/null @@ -1,1640 +0,0 @@ -From 6552add6f3a8441f49b6b1e74c169ce625c3c5dd Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 10 Jul 2024 07:30:57 +1000 -Subject: [PATCH] Updated vkd3d to 746222b349e9c009ed270fb5ca400497dfb43709. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 4 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 507 ++++++++++-------- - libs/vkd3d/libs/vkd3d-shader/fx.c | 67 +++ - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 34 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 59 ++ - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 139 ++++- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + - 7 files changed, 577 insertions(+), 234 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index 2d950b4f7aa..e7b25602ec0 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -52,6 +52,10 @@ - ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ - | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) - -+#define VKD3D_EXPAND(x) x -+#define VKD3D_STRINGIFY(x) #x -+#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) -+ - #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') - #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') - #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 2482efc55d2..ea9fe77532d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - return ret; - } - --bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -+bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) - { - unsigned int i; - -@@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - bool output; - enum vkd3d_shader_type shader_type; - unsigned int major_version; -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - unsigned int offset; - } - register_table[] = - { -- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, -- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, -- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, -- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, -+ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { -- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -+ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) - && output == register_table[i].output -- && ctx->profile->type == register_table[i].shader_type -- && ctx->profile->major_version == register_table[i].major_version) -+ && version->type == register_table[i].shader_type -+ && version->major == register_table[i].major_version) - { - *type = register_table[i].type; -- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) -+ if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) - *reg = register_table[i].offset; - else -- *reg = semantic->index; -+ *reg = semantic_index; - return true; - } - } -@@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - return false; - } - --bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -+bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -+ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) - { - static const struct - { -@@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { -- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) -+ if (!ascii_strcasecmp(semantic_name, semantics[i].name)) - { - *usage = semantics[i].usage; -- *usage_idx = semantic->index; -+ *usage_idx = semantic_index; - return true; - } - } -@@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU - return false; - } - -+struct d3dbc_compiler -+{ -+ struct vsir_program *program; -+ struct vkd3d_bytecode_buffer buffer; -+ struct vkd3d_shader_message_context *message_context; -+ -+ /* OBJECTIVE: Store all the required information in the other fields so -+ * that this hlsl_ctx is no longer necessary. */ -+ struct hlsl_ctx *ctx; -+}; -+ - static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) - { - if (type == VKD3D_SHADER_TYPE_VERTEX) -@@ -1683,8 +1695,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) - list_move_tail(&ctx->extern_vars, &sorted); - } - --static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- struct hlsl_ir_function_decl *entry_func) -+void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) - { - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; -@@ -1840,7 +1851,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); - } - --static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -+static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) - { - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -@@ -1853,7 +1864,7 @@ struct sm1_instruction - - struct sm1_dst_register - { -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; - unsigned int writemask; - uint32_t reg; -@@ -1861,7 +1872,7 @@ struct sm1_instruction - - struct sm1_src_register - { -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; -@@ -1871,6 +1882,32 @@ struct sm1_instruction - unsigned int has_dst; - }; - -+static bool is_inconsequential_instr(const struct sm1_instruction *instr) -+{ -+ const struct sm1_src_register *src = &instr->srcs[0]; -+ const struct sm1_dst_register *dst = &instr->dst; -+ unsigned int i; -+ -+ if (instr->opcode != D3DSIO_MOV) -+ return false; -+ if (dst->mod != D3DSPDM_NONE) -+ return false; -+ if (src->mod != D3DSPSM_NONE) -+ return false; -+ if (src->type != dst->type) -+ return false; -+ if (src->reg != dst->reg) -+ return false; -+ -+ for (i = 0; i < 4; ++i) -+ { -+ if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) -+ return false; -+ } -+ -+ return true; -+} -+ - static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) - { - assert(reg->writemask); -@@ -1883,15 +1920,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); - } - --static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct sm1_instruction *instr) -+static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - uint32_t token = instr->opcode; - unsigned int i; - -+ if (is_inconsequential_instr(instr)) -+ return; -+ - token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); - -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -1907,54 +1948,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); - } - --static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, -- const struct hlsl_reg *src3) -+static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) - { - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, -- .srcs[2].type = D3DSPR_TEMP, -+ .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, -- const struct hlsl_reg *src2, const struct hlsl_reg *src3) -+static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, -+ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, -- .srcs[2].type = D3DSPR_TEMP, -+ .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, -@@ -1963,26 +2003,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, -@@ -1990,49 +2029,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -+static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src, -+ D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, -@@ -2040,16 +2078,16 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); -@@ -2066,7 +2104,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_DOUBLE: -@@ -2090,7 +2128,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - break; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_BOOL: -@@ -2119,8 +2157,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - --static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - unsigned int i, x; - - for (i = 0; i < ctx->constant_defs.count; ++i) -@@ -2129,12 +2170,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { -- .type = D3DSPR_CONST, -+ .type = VKD3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = constant_reg->index, - }; - -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2144,32 +2185,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - } - } - --static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_var *var, bool output) -+static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, -+ const struct signature_element *element, bool output) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; - -- if ((!output && !var->last_read) || (output && !var->first_write)) -- return; -- -- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) -+ if (hlsl_sm1_register_from_semantic(version, element->semantic_name, -+ element->semantic_index, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else - { -- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); -+ ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); - assert(ret); -- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; -- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; -+ reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ reg.reg = element->register_index; - } - - token = D3DSIO_DCL; -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2178,39 +2219,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - -- reg.writemask = (1 << var->data_type->dimx) - 1; -+ reg.writemask = element->mask; - write_sm1_dst_register(buffer, ®); - } - --static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) - { -+ struct vsir_program *program = d3dbc->program; -+ const struct vkd3d_shader_version *version; - bool write_in = false, write_out = false; -- struct hlsl_ir_var *var; - -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) -+ version = &program->shader_version; -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) - write_in = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) - write_in = write_out = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) - write_in = true; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ if (write_in) -+ { -+ for (unsigned int i = 0; i < program->input_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); -+ } -+ -+ if (write_out) - { -- if (write_in && var->is_input_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, false); -- if (write_out && var->is_output_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, true); -+ for (unsigned int i = 0; i < program->output_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); - } - } - --static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, - unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, res_type = 0; - - token = D3DSIO_DCL; -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2237,20 +2286,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu - token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; - put_u32(buffer, token); - -- reg.type = D3DSPR_SAMPLER; -+ reg.type = VKD3DSPR_COMBINED_SAMPLER; - reg.writemask = VKD3DSP_WRITEMASK_ALL; - reg.reg = reg_id; - - write_sm1_dst_register(buffer, ®); - } - --static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - enum hlsl_sampler_dim sampler_dim; - unsigned int i, count, reg_id; - struct hlsl_ir_var *var; - -- if (ctx->profile->major_version < 2) -+ if (version->major < 2) - return; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -@@ -2273,26 +2324,25 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - } - - reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; -- write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); -+ d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); - } - } - } - } - --static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_CONST, -+ .srcs[0].type = VKD3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, -@@ -2301,10 +2351,10 @@ static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - assert(instr->reg.allocated); - assert(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) - { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -@@ -2317,28 +2367,30 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); -- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); -+ d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); - } - } - --static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - - assert(instr->reg.allocated); - - if (expr->op == HLSL_OP1_REINTERPRET) - { -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - return; - } - - if (expr->op == HLSL_OP1_CAST) - { -- write_sm1_cast(ctx, buffer, instr); -+ d3dbc_write_cast(d3dbc, instr); - return; - } - -@@ -2352,70 +2404,70 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - switch (expr->op) - { - case HLSL_OP1_ABS: -- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSX: -- write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSY: -- write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_LOG2: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); - break; - - case HLSL_OP1_NEG: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); - break; - - case HLSL_OP2_ADD: -- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: -@@ -2424,27 +2476,27 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - break; - - case HLSL_OP2_LOGIC_AND: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_LOGIC_OR: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_SLT: -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) - hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); -- write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP3_CMP: -- if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -+ if (version->type == VKD3D_SHADER_TYPE_VERTEX) - hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); -- write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - case HLSL_OP3_DP2ADD: -- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - default: -@@ -2453,10 +2505,9 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - --static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block); -+static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); - --static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_if *iff = hlsl_ir_if(instr); - const struct hlsl_ir_node *condition; -@@ -2470,33 +2521,33 @@ static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - .opcode = D3DSIO_IFC, - .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[0].reg = condition->reg.id, - .srcs[0].mod = 0, - -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[1].reg = condition->reg.id, - .srcs[1].mod = D3DSPSM_NEG, - - .src_count = 2, - }; -- write_sm1_instruction(ctx, buffer, &sm1_ifc); -- write_sm1_block(ctx, buffer, &iff->then_block); -+ d3dbc_write_instruction(d3dbc, &sm1_ifc); -+ d3dbc_write_block(d3dbc, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; -- write_sm1_instruction(ctx, buffer, &sm1_else); -- write_sm1_block(ctx, buffer, &iff->else_block); -+ d3dbc_write_instruction(d3dbc, &sm1_else); -+ d3dbc_write_block(d3dbc, &iff->else_block); - } - - sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; -- write_sm1_instruction(ctx, buffer, &sm1_endif); -+ d3dbc_write_instruction(d3dbc, &sm1_endif); - } - --static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); - -@@ -2510,35 +2561,36 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - { - .opcode = D3DSIO_TEXKILL, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg->id, - .dst.writemask = reg->writemask, - .has_dst = 1, - }; - -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - break; - } - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - } - } - --static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, -@@ -2549,15 +2601,15 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - if (load->src.var->is_uniform) - { - assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_CONST; -+ sm1_instr.srcs[0].type = VKD3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { -- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, -- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) -+ if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, -+ load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_INPUT; -+ sm1_instr.srcs[0].type = VKD3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else -@@ -2565,17 +2617,17 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - struct hlsl_ir_node *coords = load->coords.node; - struct hlsl_ir_node *ddx = load->ddx.node; - struct hlsl_ir_node *ddy = load->ddy.node; - unsigned int sampler_offset, reg_id; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - struct sm1_instruction sm1_instr; - - sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); -@@ -2583,16 +2635,16 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - sm1_instr = (struct sm1_instruction) - { -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = coords->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), - -- .srcs[1].type = D3DSPR_SAMPLER, -+ .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, - .srcs[1].reg = reg_id, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), - -@@ -2636,25 +2688,26 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - assert(instr->reg.allocated); - -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - const struct hlsl_ir_store *store = hlsl_ir_store(instr); -- const struct hlsl_ir_node *rhs = store->rhs.node; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); -+ const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, -@@ -2668,16 +2721,16 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - - if (store->lhs.var->is_output_semantic) - { -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) - { -- sm1_instr.dst.type = D3DSPR_TEMP; -+ sm1_instr.dst.type = VKD3DSPR_TEMP; - sm1_instr.dst.reg = 0; - } -- else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, -- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) -+ else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, -+ store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - assert(reg.allocated); -- sm1_instr.dst.type = D3DSPR_OUTPUT; -+ sm1_instr.dst.type = VKD3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else -@@ -2687,11 +2740,10 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - assert(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; -@@ -2699,12 +2751,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), -@@ -2714,12 +2766,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - assert(instr->reg.allocated); - assert(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block) -+static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) - { -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -@@ -2739,38 +2791,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: -- write_sm1_constant(ctx, buffer, instr); -+ d3dbc_write_constant(d3dbc, instr); - break; - - case HLSL_IR_EXPR: -- write_sm1_expr(ctx, buffer, instr); -+ d3dbc_write_expr(d3dbc, instr); - break; - - case HLSL_IR_IF: - if (hlsl_version_ge(ctx, 2, 1)) -- write_sm1_if(ctx, buffer, instr); -+ d3dbc_write_if(d3dbc, instr); - else - hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); - break; - - case HLSL_IR_JUMP: -- write_sm1_jump(ctx, buffer, instr); -+ d3dbc_write_jump(d3dbc, instr); - break; - - case HLSL_IR_LOAD: -- write_sm1_load(ctx, buffer, instr); -+ d3dbc_write_load(d3dbc, instr); - break; - - case HLSL_IR_RESOURCE_LOAD: -- write_sm1_resource_load(ctx, buffer, instr); -+ d3dbc_write_resource_load(d3dbc, instr); - break; - - case HLSL_IR_STORE: -- write_sm1_store(ctx, buffer, instr); -+ d3dbc_write_store(d3dbc, instr); - break; - - case HLSL_IR_SWIZZLE: -- write_sm1_swizzle(ctx, buffer, instr); -+ d3dbc_write_swizzle(d3dbc, instr); - break; - - default: -@@ -2779,32 +2831,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - } - } - --int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -+/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving -+ * data from the other parameters instead, so it can be removed as an argument -+ * and be declared in vkd3d_shader_private.h and used without relying on HLSL -+ * IR structs. */ -+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -- struct vkd3d_bytecode_buffer buffer = {0}; -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ struct d3dbc_compiler d3dbc = {0}; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; -+ -+ d3dbc.ctx = ctx; -+ d3dbc.program = program; -+ d3dbc.message_context = message_context; - -- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); -+ put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - -- write_sm1_uniforms(ctx, &buffer, entry_func); -+ bytecode_put_bytes(buffer, ctab->code, ctab->size); - -- write_sm1_constant_defs(ctx, &buffer); -- write_sm1_semantic_dcls(ctx, &buffer); -- write_sm1_sampler_dcls(ctx, &buffer); -- write_sm1_block(ctx, &buffer, &entry_func->body); -+ d3dbc_write_constant_defs(&d3dbc); -+ d3dbc_write_semantic_dcls(&d3dbc); -+ d3dbc_write_sampler_dcls(&d3dbc); -+ d3dbc_write_block(&d3dbc, &entry_func->body); - -- put_u32(&buffer, D3DSIO_END); -+ put_u32(buffer, D3DSIO_END); - -- if (buffer.status) -- ctx->result = buffer.status; -+ if (buffer->status) -+ ctx->result = buffer->status; - - if (!ctx->result) - { -- out->code = buffer.data; -- out->size = buffer.size; -+ out->code = buffer->data; -+ out->size = buffer->size; - } - else - { -- vkd3d_free(buffer.data); -+ vkd3d_free(buffer->data); - } - return ctx->result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 75f10a18253..0857ebb65d5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -56,6 +56,70 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) - vkd3d_free(string_entry); - } - -+struct state_block_function_info -+{ -+ const char *name; -+ unsigned int min_args, max_args; -+}; -+ -+static const struct state_block_function_info *get_state_block_function_info(const char *name) -+{ -+ static const struct state_block_function_info valid_functions[] = -+ { -+ {"SetBlendState", 3, 3}, -+ {"SetDepthStencilState", 2, 2}, -+ {"SetRasterizerState", 1, 1}, -+ {"SetVertexShader", 1, 1}, -+ {"SetDomainShader", 1, 1}, -+ {"SetHullShader", 1, 1}, -+ {"SetGeometryShader", 1, 1}, -+ {"SetPixelShader", 1, 1}, -+ {"SetComputeShader", 1, 1}, -+ {"OMSetRenderTargets", 2, 9}, -+ }; -+ -+ for (unsigned int i = 0; i < ARRAY_SIZE(valid_functions); ++i) -+ { -+ if (!strcmp(name, valid_functions[i].name)) -+ return &valid_functions[i]; -+ } -+ return NULL; -+} -+ -+bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, -+ const struct vkd3d_shader_location *loc) -+{ -+ if (entry->is_function_call) -+ { -+ const struct state_block_function_info *info = get_state_block_function_info(entry->name); -+ -+ if (!info) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid state block function '%s'.", entry->name); -+ return false; -+ } -+ if (entry->args_count < info->min_args || entry->args_count > info->max_args) -+ { -+ if (info->min_args == info->max_args) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid argument count for state block function '%s' (expected %u).", -+ entry->name, info->min_args); -+ } -+ else -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid argument count for state block function '%s' (expected from %u to %u).", -+ entry->name, info->min_args, info->max_args); -+ } -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - struct fx_write_context; - - struct fx_write_context_ops -@@ -1290,6 +1354,9 @@ static bool state_block_contains_state(const char *name, unsigned int start, str - - for (i = start; i < block->count; ++i) - { -+ if (block->entries[i]->is_function_call) -+ continue; -+ - if (!ascii_strcasecmp(block->entries[i]->name, name)) - return true; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 3e0d55a7f7d..a79d2b064cf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -507,22 +507,31 @@ struct hlsl_ir_var - * name[lhs_index] = args[0] - * - or - - * name[lhs_index] = {args[0], args[1], ...}; -+ * -+ * This struct also represents function call syntax: -+ * name(args[0], args[1], ...) - */ - struct hlsl_state_block_entry - { -- /* For assignments, the name in the lhs. */ -+ /* Whether this entry is a function call. */ -+ bool is_function_call; -+ -+ /* For assignments, the name in the lhs. -+ * For functions, the name of the function. */ - char *name; - /* Resolved format-specific property identifier. */ - unsigned int name_id; - -- /* Whether the lhs in the assignment is indexed and, in that case, its index. */ -+ /* For assignments, whether the lhs of an assignment is indexed and, in -+ * that case, its index. */ - bool lhs_has_index; - unsigned int lhs_index; - -- /* Instructions present in the rhs. */ -+ /* Instructions present in the rhs or the function arguments. */ - struct hlsl_block *instrs; - -- /* For assignments, arguments of the rhs initializer. */ -+ /* For assignments, arguments of the rhs initializer. -+ * For function calls, the arguments themselves. */ - struct hlsl_src *args; - unsigned int args_count; - }; -@@ -1284,6 +1293,9 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); - void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); - -+bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, -+ const struct vkd3d_shader_location *loc); -+ - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); -@@ -1469,10 +1481,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - - D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); - D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); --bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); --bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); --int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); -+bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); -+bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -+ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); -+ -+void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); -+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - - bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index a02692399f7..ed6b41bf403 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -3614,6 +3614,34 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); - } - -+static bool intrinsic_faceforward(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s faceforward(%s n, %s i, %s ng)\n" -+ "{\n" -+ " return dot(i, ng) < 0 ? n : -n;\n" -+ "}\n"; -+ -+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) -+ return false; -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name, type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "faceforward", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_floor(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4687,6 +4715,7 @@ intrinsic_functions[] = - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, - {"exp2", 1, true, intrinsic_exp2}, -+ {"faceforward", 3, true, intrinsic_faceforward}, - {"floor", 1, true, intrinsic_floor}, - {"fmod", 2, true, intrinsic_fmod}, - {"frac", 1, true, intrinsic_frac}, -@@ -6856,6 +6885,8 @@ parameter: - } - type = hlsl_new_array_type(ctx, type, $4.sizes[i]); - } -+ vkd3d_free($4.sizes); -+ - $$.type = type; - - if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -@@ -7377,6 +7408,34 @@ state_block: - hlsl_src_from_node(&entry->args[i], $5.args[i]); - vkd3d_free($5.args); - -+ $$ = $1; -+ state_block_add_entry($$, entry); -+ } -+ | state_block any_identifier '(' func_arguments ')' ';' -+ { -+ struct hlsl_state_block_entry *entry; -+ unsigned int i; -+ -+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) -+ YYABORT; -+ -+ entry->is_function_call = true; -+ -+ entry->name = $2; -+ entry->lhs_has_index = false; -+ entry->lhs_index = 0; -+ -+ entry->instrs = $4.instrs; -+ -+ entry->args_count = $4.args_count; -+ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) -+ YYABORT; -+ for (i = 0; i < entry->args_count; ++i) -+ hlsl_src_from_node(&entry->args[i], $4.args[i]); -+ vkd3d_free($4.args); -+ -+ hlsl_validate_state_block_entry(ctx, entry, &@4); -+ - $$ = $1; - state_block_add_entry($$, entry); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 33845b0d4bf..f9f5c8ed58a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -4716,7 +4716,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - if (ctx->profile->major_version < 4) - { -- D3DSHADER_PARAM_REGISTER_TYPE sm1_type; -+ struct vkd3d_shader_version version; - D3DDECLUSAGE usage; - uint32_t usage_idx; - -@@ -4724,8 +4724,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - return; - -- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); -- if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ builtin = hlsl_sm1_register_from_semantic(&version, -+ var->semantic.name, var->semantic.index, output, &type, ®); -+ if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); -@@ -4734,7 +4738,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - if ((!output && !var->last_read) || (output && !var->first_write)) - return; -- type = (enum vkd3d_shader_register_type)sm1_type; - } - else - { -@@ -5524,6 +5527,114 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - } while (progress); - } - -+static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, -+ struct vsir_program *program, bool output, struct hlsl_ir_var *var) -+{ -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ enum vkd3d_shader_register_type type; -+ struct shader_signature *signature; -+ struct signature_element *element; -+ unsigned int register_index, mask; -+ -+ if ((!output && !var->last_read) || (output && !var->first_write)) -+ return; -+ -+ if (output) -+ signature = &program->output_signature; -+ else -+ signature = &program->input_signature; -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element = &signature->elements[signature->element_count++]; -+ -+ if (!hlsl_sm1_register_from_semantic(&program->shader_version, -+ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) -+ { -+ unsigned int usage, usage_idx; -+ bool ret; -+ -+ register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ -+ ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); -+ assert(ret); -+ /* With the exception of vertex POSITION output, none of these are -+ * system values. Pixel POSITION input is not equivalent to -+ * SV_Position; the closer equivalent is VPOS, which is not declared -+ * as a semantic. */ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ } -+ mask = (1 << var->data_type->dimx) - 1; -+ -+ memset(element, 0, sizeof(*element)); -+ if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) -+ { -+ --signature->element_count; -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element->semantic_index = var->semantic.index; -+ element->sysval_semantic = sysval; -+ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->register_index = register_index; -+ element->target_location = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = mask; -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) -+ element->interpolation_mode = VKD3DSIM_LINEAR; -+} -+ -+static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) -+{ -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_input_semantic) -+ sm1_generate_vsir_signature_entry(ctx, program, false, var); -+ if (var->is_output_semantic) -+ sm1_generate_vsir_signature_entry(ctx, program, true, var); -+ } -+} -+ -+/* OBJECTIVE: Translate all the information from ctx and entry_func to the -+ * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -+ * without relying on ctx and entry_func. */ -+static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+{ -+ struct vkd3d_shader_version version = {0}; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ if (!vsir_program_init(program, &version, 0)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ write_sm1_uniforms(ctx, &buffer); -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ ctab->code = buffer.data; -+ ctab->size = buffer.size; -+ -+ sm1_generate_vsir_signature(ctx, program); -+} -+ - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -5706,7 +5817,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - switch (target_type) - { - case VKD3D_SHADER_TARGET_D3D_BYTECODE: -- return hlsl_sm1_write(ctx, entry_func, out); -+ { -+ uint32_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vkd3d_shader_code ctab = {0}; -+ struct vsir_program program; -+ int result; -+ -+ sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); -+ if (ctx->result) -+ { -+ vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&ctab); -+ return ctx->result; -+ } -+ -+ result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); -+ vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&ctab); -+ return result; -+ } - - case VKD3D_SHADER_TARGET_DXBC_TPF: - return hlsl_sm4_write(ctx, entry_func, out); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 37e3d31c995..c79f845b675 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -151,6 +151,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, - VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, - VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, --- -2.43.0 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch deleted file mode 100644 index 2376e6de..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch +++ /dev/null @@ -1,960 +0,0 @@ -From e940aca803c12bbd55ebe3fb26920373a56a0fab Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 11 Jul 2024 09:52:56 +1000 -Subject: [PATCH] Updated vkd3d to c792114a6a58c7c97abf827d154d7ecd22d81536. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 4 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 13 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 12 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 197 +++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 390 +++++++++++++++--- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 29 ++ - .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + - libs/vkd3d/libs/vkd3d/resource.c | 6 +- - 9 files changed, 510 insertions(+), 144 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index ea9fe77532d..4522d56c5c9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -2670,11 +2670,11 @@ static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct - case HLSL_RESOURCE_SAMPLE_GRAD: - sm1_instr.opcode = D3DSIO_TEXLDD; - -- sm1_instr.srcs[2].type = D3DSPR_TEMP; -+ sm1_instr.srcs[2].type = VKD3DSPR_TEMP; - sm1_instr.srcs[2].reg = ddx->reg.id; - sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); - -- sm1_instr.srcs[3].type = D3DSPR_TEMP; -+ sm1_instr.srcs[3].type = VKD3DSPR_TEMP; - sm1_instr.srcs[3].reg = ddy->reg.id; - sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 0857ebb65d5..bd2ad1290cd 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -830,7 +830,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - } - - name_offset = write_string(name, fx); -- semantic_offset = write_string(semantic->raw_name, fx); -+ semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; - - offset = put_u32(buffer, hlsl_sm1_base_type(type)); - put_u32(buffer, hlsl_sm1_class(type)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index a157590c97a..acf50869a40 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -1810,7 +1810,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type - } - - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, const struct vkd3d_shader_location *loc) -+ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, -+ unsigned int unroll_limit, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_loop *loop; - -@@ -1819,6 +1820,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); - hlsl_block_init(&loop->body); - hlsl_block_add_block(&loop->body, block); -+ -+ loop->unroll_type = unroll_type; -+ loop->unroll_limit = unroll_limit; - return &loop->node; - } - -@@ -1881,9 +1885,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct - return map->instrs[i].dst; - } - -- /* The block passed to hlsl_clone_block() should have been free of external -- * references. */ -- vkd3d_unreachable(); -+ return src; - } - - static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, -@@ -1980,7 +1982,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ - if (!clone_block(ctx, &body, &src->body, map)) - return NULL; - -- if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) -+ if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) - { - hlsl_block_cleanup(&body); - return NULL; -@@ -2791,6 +2793,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - static const char *const op_names[] = - { - [HLSL_OP0_VOID] = "void", -+ [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", - - [HLSL_OP1_ABS] = "abs", - [HLSL_OP1_BIT_NOT] = "~", -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index a79d2b064cf..5832958712a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -602,12 +602,21 @@ struct hlsl_ir_if - struct hlsl_block else_block; - }; - -+enum hlsl_ir_loop_unroll_type -+{ -+ HLSL_IR_LOOP_UNROLL, -+ HLSL_IR_LOOP_FORCE_UNROLL, -+ HLSL_IR_LOOP_FORCE_LOOP -+}; -+ - struct hlsl_ir_loop - { - struct hlsl_ir_node node; - /* loop condition is stored in the body (as "if (!condition) break;") */ - struct hlsl_block body; - unsigned int next_index; /* liveness index of the end of the loop */ -+ unsigned int unroll_limit; -+ enum hlsl_ir_loop_unroll_type unroll_type; - }; - - struct hlsl_ir_switch_case -@@ -629,6 +638,7 @@ struct hlsl_ir_switch - enum hlsl_ir_expr_op - { - HLSL_OP0_VOID, -+ HLSL_OP0_RASTERIZER_SAMPLE_COUNT, - - HLSL_OP1_ABS, - HLSL_OP1_BIT_NOT, -@@ -1390,7 +1400,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); - struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, const struct vkd3d_shader_location *loc); -+ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index ed6b41bf403..7b058a65bc1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -573,12 +573,91 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); - } - -+static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) -+{ -+ union hlsl_constant_value_component ret = {0}; -+ struct hlsl_ir_constant *constant; -+ struct hlsl_ir_node *node; -+ struct hlsl_block expr; -+ struct hlsl_src src; -+ -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (node->type) -+ { -+ case HLSL_IR_CONSTANT: -+ case HLSL_IR_EXPR: -+ case HLSL_IR_SWIZZLE: -+ case HLSL_IR_LOAD: -+ case HLSL_IR_INDEX: -+ continue; -+ case HLSL_IR_STORE: -+ if (hlsl_ir_store(node)->lhs.var->is_synthetic) -+ break; -+ /* fall-through */ -+ case HLSL_IR_CALL: -+ case HLSL_IR_IF: -+ case HLSL_IR_LOOP: -+ case HLSL_IR_JUMP: -+ case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_RESOURCE_STORE: -+ case HLSL_IR_SWITCH: -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Expected literal expression."); -+ break; -+ } -+ } -+ -+ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -+ return ret; -+ hlsl_block_add_block(&expr, block); -+ -+ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) -+ { -+ hlsl_block_cleanup(&expr); -+ return ret; -+ } -+ -+ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -+ hlsl_src_from_node(&src, node_from_block(&expr)); -+ hlsl_run_const_passes(ctx, &expr); -+ node = src.node; -+ hlsl_src_remove(&src); -+ -+ if (node->type == HLSL_IR_CONSTANT) -+ { -+ constant = hlsl_ir_constant(node); -+ ret = constant->value.u[0]; -+ } -+ else -+ { -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Failed to evaluate constant expression."); -+ } -+ -+ hlsl_block_cleanup(&expr); -+ -+ return ret; -+} -+ -+static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct vkd3d_shader_location *loc) -+{ -+ union hlsl_constant_value_component res; -+ -+ res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -+ return res.u; -+} -+ - static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, - struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; -+ unsigned int i, unroll_limit = 0; - struct hlsl_ir_node *loop; -- unsigned int i; - - if (attribute_list_has_duplicates(attributes)) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); -@@ -591,18 +670,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - const struct hlsl_attribute *attr = attributes->attrs[i]; - if (!strcmp(attr->name, "unroll")) - { -- if (attr->args_count) -+ if (attr->args_count > 1) - { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); -+ hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, -+ "Ignoring 'unroll' attribute with more than 1 argument."); -+ continue; - } -- else -+ -+ if (attr->args_count == 1) - { -- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); -+ struct hlsl_block expr; -+ hlsl_block_init(&expr); -+ if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) -+ return NULL; -+ -+ unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); -+ hlsl_block_cleanup(&expr); - } -+ -+ unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; - } - else if (!strcmp(attr->name, "loop")) - { -- /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ -+ unroll_type = HLSL_IR_LOOP_FORCE_LOOP; - } - else if (!strcmp(attr->name, "fastopt") - || !strcmp(attr->name, "allow_uav_condition")) -@@ -631,7 +721,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - else - list_move_head(&body->instrs, &cond->instrs); - -- if (!(loop = hlsl_new_loop(ctx, body, loc))) -+ if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) - goto oom; - hlsl_block_add_instr(init, loop); - -@@ -1320,84 +1410,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * - return block; - } - --static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, -- struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) --{ -- union hlsl_constant_value_component ret = {0}; -- struct hlsl_ir_constant *constant; -- struct hlsl_ir_node *node; -- struct hlsl_block expr; -- struct hlsl_src src; -- -- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -- { -- switch (node->type) -- { -- case HLSL_IR_CONSTANT: -- case HLSL_IR_EXPR: -- case HLSL_IR_SWIZZLE: -- case HLSL_IR_LOAD: -- case HLSL_IR_INDEX: -- continue; -- case HLSL_IR_STORE: -- if (hlsl_ir_store(node)->lhs.var->is_synthetic) -- break; -- /* fall-through */ -- case HLSL_IR_CALL: -- case HLSL_IR_IF: -- case HLSL_IR_LOOP: -- case HLSL_IR_JUMP: -- case HLSL_IR_RESOURCE_LOAD: -- case HLSL_IR_RESOURCE_STORE: -- case HLSL_IR_SWITCH: -- case HLSL_IR_STATEBLOCK_CONSTANT: -- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Expected literal expression."); -- break; -- } -- } -- -- if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -- return ret; -- hlsl_block_add_block(&expr, block); -- -- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) -- { -- hlsl_block_cleanup(&expr); -- return ret; -- } -- -- /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -- hlsl_src_from_node(&src, node_from_block(&expr)); -- hlsl_run_const_passes(ctx, &expr); -- node = src.node; -- hlsl_src_remove(&src); -- -- if (node->type == HLSL_IR_CONSTANT) -- { -- constant = hlsl_ir_constant(node); -- ret = constant->value.u[0]; -- } -- else -- { -- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Failed to evaluate constant expression."); -- } -- -- hlsl_block_cleanup(&expr); -- -- return ret; --} -- --static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -- const struct vkd3d_shader_location *loc) --{ -- union hlsl_constant_value_component res; -- -- res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -- return res.u; --} -- - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) - { - /* Scalar vars can be converted to pretty much everything */ -@@ -4676,6 +4688,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - return true; - } - -+static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *expr; -+ -+ if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, -+ operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, expr); -+ -+ return true; -+} -+ - static const struct intrinsic_function - { - const char *name; -@@ -4688,6 +4714,7 @@ intrinsic_functions[] = - { - /* Note: these entries should be kept in alphabetical order. */ - {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, -+ {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, - {"abs", 1, true, intrinsic_abs}, - {"acos", 1, true, intrinsic_acos}, - {"all", 1, true, intrinsic_all}, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index f9f5c8ed58a..7e4f168675e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1981,6 +1981,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return progress; - } - -+enum validation_result -+{ -+ DEREF_VALIDATION_OK, -+ DEREF_VALIDATION_OUT_OF_BOUNDS, -+ DEREF_VALIDATION_NOT_CONSTANT, -+}; -+ -+static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, -+ const struct hlsl_deref *deref) -+{ -+ struct hlsl_type *type = deref->var->data_type; -+ unsigned int i; -+ -+ for (i = 0; i < deref->path_len; ++i) -+ { -+ struct hlsl_ir_node *path_node = deref->path[i].node; -+ unsigned int idx = 0; -+ -+ assert(path_node); -+ if (path_node->type != HLSL_IR_CONSTANT) -+ return DEREF_VALIDATION_NOT_CONSTANT; -+ -+ /* We should always have generated a cast to UINT. */ -+ assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ if (idx >= type->dimx) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Vector index is out of bounds. %u/%u", idx, type->dimx); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_MATRIX: -+ if (idx >= hlsl_type_major_size(type)) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_ARRAY: -+ if (idx >= type->e.array.elements_count) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); -+ } -+ -+ return DEREF_VALIDATION_OK; -+} -+ - static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - const char *usage) - { -@@ -1998,60 +2068,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct - } - } - --static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - void *context) - { -- unsigned int start, count; -- -- if (instr->type == HLSL_IR_RESOURCE_LOAD) -+ switch (instr->type) - { -- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -- -- if (!load->resource.var->is_uniform) -+ case HLSL_IR_RESOURCE_LOAD: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Loaded resource must have a single uniform source."); -+ struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -+ -+ if (!load->resource.var->is_uniform) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Loaded resource must have a single uniform source."); -+ } -+ else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Loaded resource from \"%s\" must be determinable at compile time.", -+ load->resource.var->name); -+ note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); -+ } -+ -+ if (load->sampler.var) -+ { -+ if (!load->sampler.var->is_uniform) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Resource load sampler must have a single uniform source."); -+ } -+ else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Resource load sampler from \"%s\" must be determinable at compile time.", -+ load->sampler.var->name); -+ note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); -+ } -+ } -+ break; - } -- else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) -+ case HLSL_IR_RESOURCE_STORE: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Loaded resource from \"%s\" must be determinable at compile time.", -- load->resource.var->name); -- note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); -- } -+ struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - -- if (load->sampler.var) -- { -- if (!load->sampler.var->is_uniform) -+ if (!store->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Resource load sampler must have a single uniform source."); -+ "Accessed resource must have a single uniform source."); - } -- else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) -+ else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Resource load sampler from \"%s\" must be determinable at compile time.", -- load->sampler.var->name); -- note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); -+ "Accessed resource from \"%s\" must be determinable at compile time.", -+ store->resource.var->name); -+ note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); - } -+ break; - } -- } -- else if (instr->type == HLSL_IR_RESOURCE_STORE) -- { -- struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); -- -- if (!store->resource.var->is_uniform) -+ case HLSL_IR_LOAD: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Accessed resource must have a single uniform source."); -+ struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ validate_component_index_range_from_deref(ctx, &load->src); -+ break; - } -- else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) -+ case HLSL_IR_STORE: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Accessed resource from \"%s\" must be determinable at compile time.", -- store->resource.var->name); -- note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); -+ struct hlsl_ir_store *store = hlsl_ir_store(instr); -+ validate_component_index_range_from_deref(ctx, &store->lhs); -+ break; - } -+ default: -+ break; - } - - return false; -@@ -5203,21 +5290,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - { - case HLSL_CLASS_VECTOR: - if (idx >= type->dimx) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Vector index is out of bounds. %u/%u", idx, type->dimx); - return false; -- } - *start += idx; - break; - - case HLSL_CLASS_MATRIX: - if (idx >= hlsl_type_major_size(type)) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); - return false; -- } - if (hlsl_type_is_row_major(type)) - *start += idx * type->dimx; - else -@@ -5226,11 +5305,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - - case HLSL_CLASS_ARRAY: - if (idx >= type->e.array.elements_count) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); - return false; -- } - *start += idx * hlsl_type_component_count(type->e.array.type); - break; - -@@ -5635,6 +5710,222 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - sm1_generate_vsir_signature(ctx, program); - } - -+static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -+ struct hlsl_block **found_block) -+{ -+ struct hlsl_ir_node *node; -+ -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (node == stop_point) -+ return NULL; -+ -+ if (node->type == HLSL_IR_IF) -+ { -+ struct hlsl_ir_if *iff = hlsl_ir_if(node); -+ struct hlsl_ir_jump *jump = NULL; -+ -+ if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) -+ return jump; -+ if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) -+ return jump; -+ } -+ else if (node->type == HLSL_IR_JUMP) -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); -+ -+ if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) -+ { -+ *found_block = block; -+ return jump; -+ } -+ } -+ } -+ -+ return NULL; -+} -+ -+static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) -+{ -+ /* Always use the explicit limit if it has been passed. */ -+ if (loop->unroll_limit) -+ return loop->unroll_limit; -+ -+ /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ return 1024; -+ -+ /* SM4 limits implicit unrolling to 254 iterations. */ -+ if (hlsl_version_ge(ctx, 4, 0)) -+ return 254; -+ -+ /* SM<3 implicitly unrolls up to 1024 iterations. */ -+ return 1024; -+} -+ -+static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) -+{ -+ unsigned int max_iterations, i; -+ -+ max_iterations = loop_unrolling_get_max_iterations(ctx, loop); -+ -+ for (i = 0; i < max_iterations; ++i) -+ { -+ struct hlsl_block tmp_dst, *jump_block; -+ struct hlsl_ir_jump *jump = NULL; -+ -+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) -+ return false; -+ list_move_before(&loop->node.entry, &tmp_dst.instrs); -+ hlsl_block_cleanup(&tmp_dst); -+ -+ hlsl_run_const_passes(ctx, block); -+ -+ if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) -+ { -+ enum hlsl_ir_jump_type type = jump->type; -+ -+ if (jump_block != loop_parent) -+ { -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, -+ "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); -+ return false; -+ } -+ -+ list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); -+ hlsl_block_cleanup(&tmp_dst); -+ -+ if (type == HLSL_IR_JUMP_BREAK) -+ break; -+ } -+ } -+ -+ /* Native will not emit an error if max_iterations has been reached with an -+ * explicit limit. It also will not insert a loop if there are iterations left -+ * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ -+ if (!loop->unroll_limit && i == max_iterations) -+ { -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, -+ "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); -+ return false; -+ } -+ -+ list_remove(&loop->node.entry); -+ hlsl_free_instr(&loop->node); -+ -+ return true; -+} -+ -+/* -+ * loop_unrolling_find_unrollable_loop() is not the normal way to do things; -+ * normal passes simply iterate over the whole block and apply a transformation -+ * to every relevant instruction. However, loop unrolling can fail, and we want -+ * to leave the loop in its previous state in that case. That isn't a problem by -+ * itself, except that loop unrolling needs copy-prop in order to work properly, -+ * and copy-prop state at the time of the loop depends on the rest of the program -+ * up to that point. This means we need to clone the whole program, and at that -+ * point we have to search it again anyway to find the clone of the loop we were -+ * going to unroll. -+ * -+ * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop -+ * up until the loop instruction, clone just that loop, then use copyprop again -+ * with the saved state after unrolling. However, copyprop currently isn't built -+ * for that yet [notably, it still relies on indices]. Note also this still doesn't -+ * really let us use transform_ir() anyway [since we don't have a good way to say -+ * "copyprop from the beginning of the program up to the instruction we're -+ * currently processing" from the callback]; we'd have to use a dedicated -+ * recursive function instead. */ -+static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_block **containing_block) -+{ -+ struct hlsl_ir_node *instr; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (instr->type) -+ { -+ case HLSL_IR_LOOP: -+ { -+ struct hlsl_ir_loop *nested_loop; -+ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -+ -+ if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) -+ return nested_loop; -+ -+ if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ { -+ *containing_block = block; -+ return loop; -+ } -+ -+ break; -+ } -+ case HLSL_IR_IF: -+ { -+ struct hlsl_ir_loop *loop; -+ struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) -+ return loop; -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) -+ return loop; -+ -+ break; -+ } -+ case HLSL_IR_SWITCH: -+ { -+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -+ struct hlsl_ir_switch_case *c; -+ struct hlsl_ir_loop *loop; -+ -+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -+ { -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) -+ return loop; -+ } -+ -+ break; -+ } -+ default: -+ break; -+ } -+ } -+ -+ return NULL; -+} -+ -+static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) -+{ -+ while (true) -+ { -+ struct hlsl_block clone, *containing_block; -+ struct hlsl_ir_loop *loop, *cloned_loop; -+ -+ if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) -+ return; -+ -+ if (!hlsl_clone_block(ctx, &clone, block)) -+ return; -+ -+ cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); -+ assert(cloned_loop); -+ -+ if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) -+ { -+ hlsl_block_cleanup(&clone); -+ loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; -+ continue; -+ } -+ -+ hlsl_block_cleanup(block); -+ hlsl_block_init(block); -+ hlsl_block_add_block(block, &clone); -+ } -+} -+ - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -5721,6 +6012,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } - -+ transform_unroll_loops(ctx, body); - hlsl_run_const_passes(ctx, body); - - remove_unreachable_code(ctx, body); -@@ -5730,7 +6022,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - -- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, validate_dereferences, body, NULL); - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - if (profile->major_version >= 4) - hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index ca7cdfd5217..a7c37215e5e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2762,6 +2762,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, -+ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) -@@ -2817,6 +2818,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -+ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, -@@ -5042,6 +5044,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct - write_sm4_instruction(tpf, &instr); - } - -+static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -+ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; -+ instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ - static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) - { - const struct hlsl_ir_node *arg1 = expr->operands[0].node; -@@ -5057,6 +5078,14 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - - switch (expr->op) - { -+ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -+ if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) -+ write_sm4_rasterizer_sample_count(tpf, &expr->node); -+ else -+ hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); -+ break; -+ - case HLSL_OP1_ABS: - switch (dst_type->e.numeric.type) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index c79f845b675..96e613669a6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -152,6 +152,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, - VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, - VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, -+ VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index db2d87428bb..7d7f40c0953 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -4349,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript - return hr; - - descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); -- d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); -+ if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) -+ { -+ vkd3d_private_store_destroy(&descriptor_heap->private_store); -+ return hr; -+ } - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - - d3d12_device_add_ref(descriptor_heap->device = device); --- -2.43.0 -