From 12fb6d73ffbbfed1ab606ee8d4ac35005b5000cd Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 11 Jul 2024 09:54:47 +1000 Subject: [PATCH] Updated vkd3d-latest patchset --- ...-d3ba810c98b4d2df260a527f74586a0b314.patch | 2 +- ...-ccb6150aabc7cce9e26a39366c611f5a7da.patch | 2 +- ...-3dc43e8945f68c42268b8d5e43525b9e108.patch | 2 +- ...-746222b349e9c009ed270fb5ca400497dfb.patch | 1640 +++++++++++++++++ ...-c792114a6a58c7c97abf827d154d7ecd22d.patch | 960 ++++++++++ 5 files changed, 2603 insertions(+), 3 deletions(-) create mode 100644 patches/vkd3d-latest/0004-Updated-vkd3d-to-746222b349e9c009ed270fb5ca400497dfb.patch create mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-d3ba810c98b4d2df260a527f74586a0b314.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-d3ba810c98b4d2df260a527f74586a0b314.patch index 50eaba15..9f37d446 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-d3ba810c98b4d2df260a527f74586a0b314.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-d3ba810c98b4d2df260a527f74586a0b314.patch @@ -1,4 +1,4 @@ -From aba492b0cf6cda5452b16a2688de012b7908a762 Mon Sep 17 00:00:00 2001 +From bcb85270b8635f3d35b7d559c1800597589c62d1 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to d3ba810c98b4d2df260a527f74586a0b31408510. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-ccb6150aabc7cce9e26a39366c611f5a7da.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-ccb6150aabc7cce9e26a39366c611f5a7da.patch index 30c94926..5258811b 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-ccb6150aabc7cce9e26a39366c611f5a7da.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-ccb6150aabc7cce9e26a39366c611f5a7da.patch @@ -1,4 +1,4 @@ -From aaeb96da1ada9e18e982a17444e13305506aa097 Mon Sep 17 00:00:00 2001 +From f9d3eae7971ce35994e20546183c7147d33c1d4a Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Sun, 23 Jun 2024 15:40:43 +1000 Subject: [PATCH] Updated vkd3d to ccb6150aabc7cce9e26a39366c611f5a7da789e4. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-3dc43e8945f68c42268b8d5e43525b9e108.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-3dc43e8945f68c42268b8d5e43525b9e108.patch index aa83716d..c05fd1be 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-3dc43e8945f68c42268b8d5e43525b9e108.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-3dc43e8945f68c42268b8d5e43525b9e108.patch @@ -1,4 +1,4 @@ -From 15652871b2e9951be700c7baf1988ae5db09ccad Mon Sep 17 00:00:00 2001 +From 13ac90e6b53ef7af6231bec0fc2c3b5200215bf7 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 9 Jul 2024 07:22:05 +1000 Subject: [PATCH] Updated vkd3d to 3dc43e8945f68c42268b8d5e43525b9e10806f77. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-746222b349e9c009ed270fb5ca400497dfb.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-746222b349e9c009ed270fb5ca400497dfb.patch new file mode 100644 index 00000000..851a2cc0 --- /dev/null +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-746222b349e9c009ed270fb5ca400497dfb.patch @@ -0,0 +1,1640 @@ +From 6552add6f3a8441f49b6b1e74c169ce625c3c5dd Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Wed, 10 Jul 2024 07:30:57 +1000 +Subject: [PATCH] Updated vkd3d to 746222b349e9c009ed270fb5ca400497dfb43709. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 4 + + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 507 ++++++++++-------- + libs/vkd3d/libs/vkd3d-shader/fx.c | 67 +++ + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 34 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 59 ++ + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 139 ++++- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + + 7 files changed, 577 insertions(+), 234 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index 2d950b4f7aa..e7b25602ec0 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -52,6 +52,10 @@ + ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ + | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) + ++#define VKD3D_EXPAND(x) x ++#define VKD3D_STRINGIFY(x) #x ++#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) ++ + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') + #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') + #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 2482efc55d2..ea9fe77532d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c + return ret; + } + +-bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) ++bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) + { + unsigned int i; + +@@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + unsigned int offset; + } + register_table[] = + { +- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, +- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, +- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, +- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, ++ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { +- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) ++ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) + && output == register_table[i].output +- && ctx->profile->type == register_table[i].shader_type +- && ctx->profile->major_version == register_table[i].major_version) ++ && version->type == register_table[i].shader_type ++ && version->major == register_table[i].major_version) + { + *type = register_table[i].type; +- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) ++ if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) + *reg = register_table[i].offset; + else +- *reg = semantic->index; ++ *reg = semantic_index; + return true; + } + } +@@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + return false; + } + +-bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) ++bool hlsl_sm1_usage_from_semantic(const char *semantic_name, ++ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) + { + static const struct + { +@@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { +- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) ++ if (!ascii_strcasecmp(semantic_name, semantics[i].name)) + { + *usage = semantics[i].usage; +- *usage_idx = semantic->index; ++ *usage_idx = semantic_index; + return true; + } + } +@@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU + return false; + } + ++struct d3dbc_compiler ++{ ++ struct vsir_program *program; ++ struct vkd3d_bytecode_buffer buffer; ++ struct vkd3d_shader_message_context *message_context; ++ ++ /* OBJECTIVE: Store all the required information in the other fields so ++ * that this hlsl_ctx is no longer necessary. */ ++ struct hlsl_ctx *ctx; ++}; ++ + static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) + { + if (type == VKD3D_SHADER_TYPE_VERTEX) +@@ -1683,8 +1695,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) + list_move_tail(&ctx->extern_vars, &sorted); + } + +-static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- struct hlsl_ir_function_decl *entry_func) ++void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) + { + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; +@@ -1840,7 +1851,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); + } + +-static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) ++static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) + { + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +@@ -1853,7 +1864,7 @@ struct sm1_instruction + + struct sm1_dst_register + { +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; +@@ -1861,7 +1872,7 @@ struct sm1_instruction + + struct sm1_src_register + { +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; +@@ -1871,6 +1882,32 @@ struct sm1_instruction + unsigned int has_dst; + }; + ++static bool is_inconsequential_instr(const struct sm1_instruction *instr) ++{ ++ const struct sm1_src_register *src = &instr->srcs[0]; ++ const struct sm1_dst_register *dst = &instr->dst; ++ unsigned int i; ++ ++ if (instr->opcode != D3DSIO_MOV) ++ return false; ++ if (dst->mod != D3DSPDM_NONE) ++ return false; ++ if (src->mod != D3DSPSM_NONE) ++ return false; ++ if (src->type != dst->type) ++ return false; ++ if (src->reg != dst->reg) ++ return false; ++ ++ for (i = 0; i < 4; ++i) ++ { ++ if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) ++ return false; ++ } ++ ++ return true; ++} ++ + static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) + { + assert(reg->writemask); +@@ -1883,15 +1920,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); + } + +-static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct sm1_instruction *instr) ++static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + uint32_t token = instr->opcode; + unsigned int i; + ++ if (is_inconsequential_instr(instr)) ++ return; ++ + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -1907,54 +1948,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w + src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); + } + +-static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, +- const struct hlsl_reg *src3) ++static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) + { + struct sm1_instruction instr = + { + .opcode = D3DSIO_DP2ADD, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, +- .srcs[2].type = D3DSPR_TEMP, ++ .srcs[2].type = VKD3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, +- const struct hlsl_reg *src2, const struct hlsl_reg *src3) ++static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, ++ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, +- .srcs[2].type = D3DSPR_TEMP, ++ .srcs[2].type = VKD3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, +@@ -1963,26 +2003,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, +@@ -1990,49 +2029,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) ++static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src, ++ D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.mod = dst_mod, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, +@@ -2040,16 +2078,16 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = arg1->data_type; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + + /* Narrowing casts were already lowered. */ + assert(src_type->dimx == dst_type->dimx); +@@ -2066,7 +2104,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_TYPE_DOUBLE: +@@ -2090,7 +2128,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + break; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_TYPE_BOOL: +@@ -2119,8 +2157,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + +-static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) +@@ -2129,12 +2170,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { +- .type = D3DSPR_CONST, ++ .type = VKD3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = constant_reg->index, + }; + +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2144,32 +2185,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + } + } + +-static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_var *var, bool output) ++static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, ++ const struct signature_element *element, bool output) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + +- if ((!output && !var->last_read) || (output && !var->first_write)) +- return; +- +- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) ++ if (hlsl_sm1_register_from_semantic(version, element->semantic_name, ++ element->semantic_index, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { +- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); ++ ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); + assert(ret); +- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; +- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; ++ reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ reg.reg = element->register_index; + } + + token = D3DSIO_DCL; +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2178,39 +2219,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + +- reg.writemask = (1 << var->data_type->dimx) - 1; ++ reg.writemask = element->mask; + write_sm1_dst_register(buffer, ®); + } + +-static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) + { ++ struct vsir_program *program = d3dbc->program; ++ const struct vkd3d_shader_version *version; + bool write_in = false, write_out = false; +- struct hlsl_ir_var *var; + +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) ++ version = &program->shader_version; ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) + write_in = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) ++ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) + write_in = write_out = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) ++ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) + write_in = true; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ if (write_in) ++ { ++ for (unsigned int i = 0; i < program->input_signature.element_count; ++i) ++ d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); ++ } ++ ++ if (write_out) + { +- if (write_in && var->is_input_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, false); +- if (write_out && var->is_output_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, true); ++ for (unsigned int i = 0; i < program->output_signature.element_count; ++i) ++ d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); + } + } + +-static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2237,20 +2286,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + +- reg.type = D3DSPR_SAMPLER; ++ reg.type = VKD3DSPR_COMBINED_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); + } + +-static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + +- if (ctx->profile->major_version < 2) ++ if (version->major < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +@@ -2273,26 +2324,25 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + } + + reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; +- write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); ++ d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); + } + } + } + } + +-static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_CONST, ++ .srcs[0].type = VKD3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, +@@ -2301,10 +2351,10 @@ static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + assert(instr->reg.allocated); + assert(constant->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, + const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); +@@ -2317,28 +2367,30 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); +- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); ++ d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); + } + } + +-static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + + assert(instr->reg.allocated); + + if (expr->op == HLSL_OP1_REINTERPRET) + { +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + return; + } + + if (expr->op == HLSL_OP1_CAST) + { +- write_sm1_cast(ctx, buffer, instr); ++ d3dbc_write_cast(d3dbc, instr); + return; + } + +@@ -2352,70 +2404,70 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + switch (expr->op) + { + case HLSL_OP1_ABS: +- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSX: +- write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: +- write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_EXP2: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); + break; + + case HLSL_OP1_LOG2: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); + break; + + case HLSL_OP1_NEG: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + break; + + case HLSL_OP1_SAT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + break; + + case HLSL_OP1_RCP: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); + break; + + case HLSL_OP1_RSQ: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); + break; + + case HLSL_OP2_ADD: +- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP1_FRACT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + break; + + case HLSL_OP2_DOT: + switch (arg1->data_type->dimx) + { + case 4: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: +@@ -2424,27 +2476,27 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + break; + + case HLSL_OP2_LOGIC_AND: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_LOGIC_OR: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_SLT: +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL) + hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); +- write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP3_CMP: +- if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ if (version->type == VKD3D_SHADER_TYPE_VERTEX) + hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); +- write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + case HLSL_OP3_DP2ADD: +- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: +@@ -2453,10 +2505,9 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + +-static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block); ++static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); + +-static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_if *iff = hlsl_ir_if(instr); + const struct hlsl_ir_node *condition; +@@ -2470,33 +2521,33 @@ static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf + .opcode = D3DSIO_IFC, + .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[0].reg = condition->reg.id, + .srcs[0].mod = 0, + +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[1].reg = condition->reg.id, + .srcs[1].mod = D3DSPSM_NEG, + + .src_count = 2, + }; +- write_sm1_instruction(ctx, buffer, &sm1_ifc); +- write_sm1_block(ctx, buffer, &iff->then_block); ++ d3dbc_write_instruction(d3dbc, &sm1_ifc); ++ d3dbc_write_block(d3dbc, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; +- write_sm1_instruction(ctx, buffer, &sm1_else); +- write_sm1_block(ctx, buffer, &iff->else_block); ++ d3dbc_write_instruction(d3dbc, &sm1_else); ++ d3dbc_write_block(d3dbc, &iff->else_block); + } + + sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; +- write_sm1_instruction(ctx, buffer, &sm1_endif); ++ d3dbc_write_instruction(d3dbc, &sm1_endif); + } + +-static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + +@@ -2510,35 +2561,36 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + { + .opcode = D3DSIO_TEXKILL, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + break; + } + + default: +- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); ++ hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + } + } + +-static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, +@@ -2549,15 +2601,15 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + if (load->src.var->is_uniform) + { + assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_CONST; ++ sm1_instr.srcs[0].type = VKD3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { +- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, +- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) ++ if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, ++ load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { + assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_INPUT; ++ sm1_instr.srcs[0].type = VKD3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else +@@ -2565,17 +2617,17 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; + struct hlsl_ir_node *ddx = load->ddx.node; + struct hlsl_ir_node *ddy = load->ddy.node; + unsigned int sampler_offset, reg_id; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); +@@ -2583,16 +2635,16 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + + sm1_instr = (struct sm1_instruction) + { +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), + +- .srcs[1].type = D3DSPR_SAMPLER, ++ .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + +@@ -2636,25 +2688,26 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + + assert(instr->reg.allocated); + +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + const struct hlsl_ir_store *store = hlsl_ir_store(instr); +- const struct hlsl_ir_node *rhs = store->rhs.node; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); ++ const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, +@@ -2668,16 +2721,16 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + + if (store->lhs.var->is_output_semantic) + { +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) + { +- sm1_instr.dst.type = D3DSPR_TEMP; ++ sm1_instr.dst.type = VKD3DSPR_TEMP; + sm1_instr.dst.reg = 0; + } +- else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, +- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) ++ else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, ++ store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { + assert(reg.allocated); +- sm1_instr.dst.type = D3DSPR_OUTPUT; ++ sm1_instr.dst.type = VKD3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else +@@ -2687,11 +2740,10 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + assert(reg.allocated); + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; +@@ -2699,12 +2751,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), +@@ -2714,12 +2766,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + assert(instr->reg.allocated); + assert(val->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block) ++static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) + { ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +@@ -2739,38 +2791,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: +- write_sm1_constant(ctx, buffer, instr); ++ d3dbc_write_constant(d3dbc, instr); + break; + + case HLSL_IR_EXPR: +- write_sm1_expr(ctx, buffer, instr); ++ d3dbc_write_expr(d3dbc, instr); + break; + + case HLSL_IR_IF: + if (hlsl_version_ge(ctx, 2, 1)) +- write_sm1_if(ctx, buffer, instr); ++ d3dbc_write_if(d3dbc, instr); + else + hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); + break; + + case HLSL_IR_JUMP: +- write_sm1_jump(ctx, buffer, instr); ++ d3dbc_write_jump(d3dbc, instr); + break; + + case HLSL_IR_LOAD: +- write_sm1_load(ctx, buffer, instr); ++ d3dbc_write_load(d3dbc, instr); + break; + + case HLSL_IR_RESOURCE_LOAD: +- write_sm1_resource_load(ctx, buffer, instr); ++ d3dbc_write_resource_load(d3dbc, instr); + break; + + case HLSL_IR_STORE: +- write_sm1_store(ctx, buffer, instr); ++ d3dbc_write_store(d3dbc, instr); + break; + + case HLSL_IR_SWIZZLE: +- write_sm1_swizzle(ctx, buffer, instr); ++ d3dbc_write_swizzle(d3dbc, instr); + break; + + default: +@@ -2779,32 +2831,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + } + } + +-int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) ++/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving ++ * data from the other parameters instead, so it can be removed as an argument ++ * and be declared in vkd3d_shader_private.h and used without relying on HLSL ++ * IR structs. */ ++int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { +- struct vkd3d_bytecode_buffer buffer = {0}; ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ struct d3dbc_compiler d3dbc = {0}; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; ++ ++ d3dbc.ctx = ctx; ++ d3dbc.program = program; ++ d3dbc.message_context = message_context; + +- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); ++ put_u32(buffer, sm1_version(version->type, version->major, version->minor)); + +- write_sm1_uniforms(ctx, &buffer, entry_func); ++ bytecode_put_bytes(buffer, ctab->code, ctab->size); + +- write_sm1_constant_defs(ctx, &buffer); +- write_sm1_semantic_dcls(ctx, &buffer); +- write_sm1_sampler_dcls(ctx, &buffer); +- write_sm1_block(ctx, &buffer, &entry_func->body); ++ d3dbc_write_constant_defs(&d3dbc); ++ d3dbc_write_semantic_dcls(&d3dbc); ++ d3dbc_write_sampler_dcls(&d3dbc); ++ d3dbc_write_block(&d3dbc, &entry_func->body); + +- put_u32(&buffer, D3DSIO_END); ++ put_u32(buffer, D3DSIO_END); + +- if (buffer.status) +- ctx->result = buffer.status; ++ if (buffer->status) ++ ctx->result = buffer->status; + + if (!ctx->result) + { +- out->code = buffer.data; +- out->size = buffer.size; ++ out->code = buffer->data; ++ out->size = buffer->size; + } + else + { +- vkd3d_free(buffer.data); ++ vkd3d_free(buffer->data); + } + return ctx->result; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 75f10a18253..0857ebb65d5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -56,6 +56,70 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) + vkd3d_free(string_entry); + } + ++struct state_block_function_info ++{ ++ const char *name; ++ unsigned int min_args, max_args; ++}; ++ ++static const struct state_block_function_info *get_state_block_function_info(const char *name) ++{ ++ static const struct state_block_function_info valid_functions[] = ++ { ++ {"SetBlendState", 3, 3}, ++ {"SetDepthStencilState", 2, 2}, ++ {"SetRasterizerState", 1, 1}, ++ {"SetVertexShader", 1, 1}, ++ {"SetDomainShader", 1, 1}, ++ {"SetHullShader", 1, 1}, ++ {"SetGeometryShader", 1, 1}, ++ {"SetPixelShader", 1, 1}, ++ {"SetComputeShader", 1, 1}, ++ {"OMSetRenderTargets", 2, 9}, ++ }; ++ ++ for (unsigned int i = 0; i < ARRAY_SIZE(valid_functions); ++i) ++ { ++ if (!strcmp(name, valid_functions[i].name)) ++ return &valid_functions[i]; ++ } ++ return NULL; ++} ++ ++bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, ++ const struct vkd3d_shader_location *loc) ++{ ++ if (entry->is_function_call) ++ { ++ const struct state_block_function_info *info = get_state_block_function_info(entry->name); ++ ++ if (!info) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid state block function '%s'.", entry->name); ++ return false; ++ } ++ if (entry->args_count < info->min_args || entry->args_count > info->max_args) ++ { ++ if (info->min_args == info->max_args) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid argument count for state block function '%s' (expected %u).", ++ entry->name, info->min_args); ++ } ++ else ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid argument count for state block function '%s' (expected from %u to %u).", ++ entry->name, info->min_args, info->max_args); ++ } ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + struct fx_write_context; + + struct fx_write_context_ops +@@ -1290,6 +1354,9 @@ static bool state_block_contains_state(const char *name, unsigned int start, str + + for (i = start; i < block->count; ++i) + { ++ if (block->entries[i]->is_function_call) ++ continue; ++ + if (!ascii_strcasecmp(block->entries[i]->name, name)) + return true; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 3e0d55a7f7d..a79d2b064cf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -507,22 +507,31 @@ struct hlsl_ir_var + * name[lhs_index] = args[0] + * - or - + * name[lhs_index] = {args[0], args[1], ...}; ++ * ++ * This struct also represents function call syntax: ++ * name(args[0], args[1], ...) + */ + struct hlsl_state_block_entry + { +- /* For assignments, the name in the lhs. */ ++ /* Whether this entry is a function call. */ ++ bool is_function_call; ++ ++ /* For assignments, the name in the lhs. ++ * For functions, the name of the function. */ + char *name; + /* Resolved format-specific property identifier. */ + unsigned int name_id; + +- /* Whether the lhs in the assignment is indexed and, in that case, its index. */ ++ /* For assignments, whether the lhs of an assignment is indexed and, in ++ * that case, its index. */ + bool lhs_has_index; + unsigned int lhs_index; + +- /* Instructions present in the rhs. */ ++ /* Instructions present in the rhs or the function arguments. */ + struct hlsl_block *instrs; + +- /* For assignments, arguments of the rhs initializer. */ ++ /* For assignments, arguments of the rhs initializer. ++ * For function calls, the arguments themselves. */ + struct hlsl_src *args; + unsigned int args_count; + }; +@@ -1284,6 +1293,9 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); + void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); + ++bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, ++ const struct vkd3d_shader_location *loc); ++ + void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); +@@ -1469,10 +1481,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, + + D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); + D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +-bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); +-bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); +-int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); ++bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); ++bool hlsl_sm1_usage_from_semantic(const char *semantic_name, ++ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); ++ ++void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); ++int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + + bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, + const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index a02692399f7..ed6b41bf403 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -3614,6 +3614,34 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); + } + ++static bool intrinsic_faceforward(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s faceforward(%s n, %s i, %s ng)\n" ++ "{\n" ++ " return dot(i, ng) < 0 ? n : -n;\n" ++ "}\n"; ++ ++ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) ++ return false; ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type->name, type->name, type->name, type->name))) ++ return false; ++ func = hlsl_compile_internal_function(ctx, "faceforward", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ + static bool intrinsic_floor(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4687,6 +4715,7 @@ intrinsic_functions[] = + {"dot", 2, true, intrinsic_dot}, + {"exp", 1, true, intrinsic_exp}, + {"exp2", 1, true, intrinsic_exp2}, ++ {"faceforward", 3, true, intrinsic_faceforward}, + {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, + {"frac", 1, true, intrinsic_frac}, +@@ -6856,6 +6885,8 @@ parameter: + } + type = hlsl_new_array_type(ctx, type, $4.sizes[i]); + } ++ vkd3d_free($4.sizes); ++ + $$.type = type; + + if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) +@@ -7377,6 +7408,34 @@ state_block: + hlsl_src_from_node(&entry->args[i], $5.args[i]); + vkd3d_free($5.args); + ++ $$ = $1; ++ state_block_add_entry($$, entry); ++ } ++ | state_block any_identifier '(' func_arguments ')' ';' ++ { ++ struct hlsl_state_block_entry *entry; ++ unsigned int i; ++ ++ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) ++ YYABORT; ++ ++ entry->is_function_call = true; ++ ++ entry->name = $2; ++ entry->lhs_has_index = false; ++ entry->lhs_index = 0; ++ ++ entry->instrs = $4.instrs; ++ ++ entry->args_count = $4.args_count; ++ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) ++ YYABORT; ++ for (i = 0; i < entry->args_count; ++i) ++ hlsl_src_from_node(&entry->args[i], $4.args[i]); ++ vkd3d_free($4.args); ++ ++ hlsl_validate_state_block_entry(ctx, entry, &@4); ++ + $$ = $1; + state_block_add_entry($$, entry); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 33845b0d4bf..f9f5c8ed58a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -4716,7 +4716,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + if (ctx->profile->major_version < 4) + { +- D3DSHADER_PARAM_REGISTER_TYPE sm1_type; ++ struct vkd3d_shader_version version; + D3DDECLUSAGE usage; + uint32_t usage_idx; + +@@ -4724,8 +4724,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + return; + +- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); +- if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ builtin = hlsl_sm1_register_from_semantic(&version, ++ var->semantic.name, var->semantic.index, output, &type, ®); ++ if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -4734,7 +4738,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + if ((!output && !var->last_read) || (output && !var->first_write)) + return; +- type = (enum vkd3d_shader_register_type)sm1_type; + } + else + { +@@ -5524,6 +5527,114 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + } while (progress); + } + ++static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, ++ struct vsir_program *program, bool output, struct hlsl_ir_var *var) ++{ ++ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ enum vkd3d_shader_register_type type; ++ struct shader_signature *signature; ++ struct signature_element *element; ++ unsigned int register_index, mask; ++ ++ if ((!output && !var->last_read) || (output && !var->first_write)) ++ return; ++ ++ if (output) ++ signature = &program->output_signature; ++ else ++ signature = &program->input_signature; ++ ++ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, ++ signature->element_count + 1, sizeof(*signature->elements))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element = &signature->elements[signature->element_count++]; ++ ++ if (!hlsl_sm1_register_from_semantic(&program->shader_version, ++ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ { ++ unsigned int usage, usage_idx; ++ bool ret; ++ ++ register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ ++ ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); ++ assert(ret); ++ /* With the exception of vertex POSITION output, none of these are ++ * system values. Pixel POSITION input is not equivalent to ++ * SV_Position; the closer equivalent is VPOS, which is not declared ++ * as a semantic. */ ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ } ++ mask = (1 << var->data_type->dimx) - 1; ++ ++ memset(element, 0, sizeof(*element)); ++ if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) ++ { ++ --signature->element_count; ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element->semantic_index = var->semantic.index; ++ element->sysval_semantic = sysval; ++ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ element->register_index = register_index; ++ element->target_location = register_index; ++ element->register_count = 1; ++ element->mask = mask; ++ element->used_mask = mask; ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) ++ element->interpolation_mode = VKD3DSIM_LINEAR; ++} ++ ++static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) ++{ ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_input_semantic) ++ sm1_generate_vsir_signature_entry(ctx, program, false, var); ++ if (var->is_output_semantic) ++ sm1_generate_vsir_signature_entry(ctx, program, true, var); ++ } ++} ++ ++/* OBJECTIVE: Translate all the information from ctx and entry_func to the ++ * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() ++ * without relying on ctx and entry_func. */ ++static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) ++{ ++ struct vkd3d_shader_version version = {0}; ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ if (!vsir_program_init(program, &version, 0)) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ write_sm1_uniforms(ctx, &buffer); ++ if (buffer.status) ++ { ++ vkd3d_free(buffer.data); ++ ctx->result = buffer.status; ++ return; ++ } ++ ctab->code = buffer.data; ++ ctab->size = buffer.size; ++ ++ sm1_generate_vsir_signature(ctx, program); ++} ++ + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) + { +@@ -5706,7 +5817,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + switch (target_type) + { + case VKD3D_SHADER_TARGET_D3D_BYTECODE: +- return hlsl_sm1_write(ctx, entry_func, out); ++ { ++ uint32_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vkd3d_shader_code ctab = {0}; ++ struct vsir_program program; ++ int result; ++ ++ sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); ++ if (ctx->result) ++ { ++ vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&ctab); ++ return ctx->result; ++ } ++ ++ result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); ++ vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&ctab); ++ return result; ++ } + + case VKD3D_SHADER_TARGET_DXBC_TPF: + return hlsl_sm4_write(ctx, entry_func, out); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 37e3d31c995..c79f845b675 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -151,6 +151,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, + VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, + VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, ++ VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch new file mode 100644 index 00000000..2376e6de --- /dev/null +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch @@ -0,0 +1,960 @@ +From e940aca803c12bbd55ebe3fb26920373a56a0fab Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 11 Jul 2024 09:52:56 +1000 +Subject: [PATCH] Updated vkd3d to c792114a6a58c7c97abf827d154d7ecd22d81536. + +--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 4 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 13 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 12 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 197 +++++---- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 390 +++++++++++++++--- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 29 ++ + .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + + libs/vkd3d/libs/vkd3d/resource.c | 6 +- + 9 files changed, 510 insertions(+), 144 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index ea9fe77532d..4522d56c5c9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -2670,11 +2670,11 @@ static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct + case HLSL_RESOURCE_SAMPLE_GRAD: + sm1_instr.opcode = D3DSIO_TEXLDD; + +- sm1_instr.srcs[2].type = D3DSPR_TEMP; ++ sm1_instr.srcs[2].type = VKD3DSPR_TEMP; + sm1_instr.srcs[2].reg = ddx->reg.id; + sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); + +- sm1_instr.srcs[3].type = D3DSPR_TEMP; ++ sm1_instr.srcs[3].type = VKD3DSPR_TEMP; + sm1_instr.srcs[3].reg = ddy->reg.id; + sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 0857ebb65d5..bd2ad1290cd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -830,7 +830,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + } + + name_offset = write_string(name, fx); +- semantic_offset = write_string(semantic->raw_name, fx); ++ semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; + + offset = put_u32(buffer, hlsl_sm1_base_type(type)); + put_u32(buffer, hlsl_sm1_class(type)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index a157590c97a..acf50869a40 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -1810,7 +1810,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type + } + + struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, const struct vkd3d_shader_location *loc) ++ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, ++ unsigned int unroll_limit, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_loop *loop; + +@@ -1819,6 +1820,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); + hlsl_block_init(&loop->body); + hlsl_block_add_block(&loop->body, block); ++ ++ loop->unroll_type = unroll_type; ++ loop->unroll_limit = unroll_limit; + return &loop->node; + } + +@@ -1881,9 +1885,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct + return map->instrs[i].dst; + } + +- /* The block passed to hlsl_clone_block() should have been free of external +- * references. */ +- vkd3d_unreachable(); ++ return src; + } + + static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, +@@ -1980,7 +1982,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ + if (!clone_block(ctx, &body, &src->body, map)) + return NULL; + +- if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) ++ if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + { + hlsl_block_cleanup(&body); + return NULL; +@@ -2791,6 +2793,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + static const char *const op_names[] = + { + [HLSL_OP0_VOID] = "void", ++ [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", + + [HLSL_OP1_ABS] = "abs", + [HLSL_OP1_BIT_NOT] = "~", +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index a79d2b064cf..5832958712a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -602,12 +602,21 @@ struct hlsl_ir_if + struct hlsl_block else_block; + }; + ++enum hlsl_ir_loop_unroll_type ++{ ++ HLSL_IR_LOOP_UNROLL, ++ HLSL_IR_LOOP_FORCE_UNROLL, ++ HLSL_IR_LOOP_FORCE_LOOP ++}; ++ + struct hlsl_ir_loop + { + struct hlsl_ir_node node; + /* loop condition is stored in the body (as "if (!condition) break;") */ + struct hlsl_block body; + unsigned int next_index; /* liveness index of the end of the loop */ ++ unsigned int unroll_limit; ++ enum hlsl_ir_loop_unroll_type unroll_type; + }; + + struct hlsl_ir_switch_case +@@ -629,6 +638,7 @@ struct hlsl_ir_switch + enum hlsl_ir_expr_op + { + HLSL_OP0_VOID, ++ HLSL_OP0_RASTERIZER_SAMPLE_COUNT, + + HLSL_OP1_ABS, + HLSL_OP1_BIT_NOT, +@@ -1390,7 +1400,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); + struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, const struct vkd3d_shader_location *loc); ++ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, + const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index ed6b41bf403..7b058a65bc1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -573,12 +573,91 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); + } + ++static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) ++{ ++ union hlsl_constant_value_component ret = {0}; ++ struct hlsl_ir_constant *constant; ++ struct hlsl_ir_node *node; ++ struct hlsl_block expr; ++ struct hlsl_src src; ++ ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ switch (node->type) ++ { ++ case HLSL_IR_CONSTANT: ++ case HLSL_IR_EXPR: ++ case HLSL_IR_SWIZZLE: ++ case HLSL_IR_LOAD: ++ case HLSL_IR_INDEX: ++ continue; ++ case HLSL_IR_STORE: ++ if (hlsl_ir_store(node)->lhs.var->is_synthetic) ++ break; ++ /* fall-through */ ++ case HLSL_IR_CALL: ++ case HLSL_IR_IF: ++ case HLSL_IR_LOOP: ++ case HLSL_IR_JUMP: ++ case HLSL_IR_RESOURCE_LOAD: ++ case HLSL_IR_RESOURCE_STORE: ++ case HLSL_IR_SWITCH: ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Expected literal expression."); ++ break; ++ } ++ } ++ ++ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) ++ return ret; ++ hlsl_block_add_block(&expr, block); ++ ++ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) ++ { ++ hlsl_block_cleanup(&expr); ++ return ret; ++ } ++ ++ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ ++ hlsl_src_from_node(&src, node_from_block(&expr)); ++ hlsl_run_const_passes(ctx, &expr); ++ node = src.node; ++ hlsl_src_remove(&src); ++ ++ if (node->type == HLSL_IR_CONSTANT) ++ { ++ constant = hlsl_ir_constant(node); ++ ret = constant->value.u[0]; ++ } ++ else ++ { ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Failed to evaluate constant expression."); ++ } ++ ++ hlsl_block_cleanup(&expr); ++ ++ return ret; ++} ++ ++static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct vkd3d_shader_location *loc) ++{ ++ union hlsl_constant_value_component res; ++ ++ res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); ++ return res.u; ++} ++ + static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, + struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) + { ++ enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; ++ unsigned int i, unroll_limit = 0; + struct hlsl_ir_node *loop; +- unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); +@@ -591,18 +670,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct hlsl_attribute *attr = attributes->attrs[i]; + if (!strcmp(attr->name, "unroll")) + { +- if (attr->args_count) ++ if (attr->args_count > 1) + { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); ++ hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, ++ "Ignoring 'unroll' attribute with more than 1 argument."); ++ continue; + } +- else ++ ++ if (attr->args_count == 1) + { +- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); ++ struct hlsl_block expr; ++ hlsl_block_init(&expr); ++ if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) ++ return NULL; ++ ++ unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); ++ hlsl_block_cleanup(&expr); + } ++ ++ unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; + } + else if (!strcmp(attr->name, "loop")) + { +- /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ ++ unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + } + else if (!strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) +@@ -631,7 +721,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + else + list_move_head(&body->instrs, &cond->instrs); + +- if (!(loop = hlsl_new_loop(ctx, body, loc))) ++ if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) + goto oom; + hlsl_block_add_instr(init, loop); + +@@ -1320,84 +1410,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * + return block; + } + +-static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, +- struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) +-{ +- union hlsl_constant_value_component ret = {0}; +- struct hlsl_ir_constant *constant; +- struct hlsl_ir_node *node; +- struct hlsl_block expr; +- struct hlsl_src src; +- +- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) +- { +- switch (node->type) +- { +- case HLSL_IR_CONSTANT: +- case HLSL_IR_EXPR: +- case HLSL_IR_SWIZZLE: +- case HLSL_IR_LOAD: +- case HLSL_IR_INDEX: +- continue; +- case HLSL_IR_STORE: +- if (hlsl_ir_store(node)->lhs.var->is_synthetic) +- break; +- /* fall-through */ +- case HLSL_IR_CALL: +- case HLSL_IR_IF: +- case HLSL_IR_LOOP: +- case HLSL_IR_JUMP: +- case HLSL_IR_RESOURCE_LOAD: +- case HLSL_IR_RESOURCE_STORE: +- case HLSL_IR_SWITCH: +- case HLSL_IR_STATEBLOCK_CONSTANT: +- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Expected literal expression."); +- break; +- } +- } +- +- if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) +- return ret; +- hlsl_block_add_block(&expr, block); +- +- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) +- { +- hlsl_block_cleanup(&expr); +- return ret; +- } +- +- /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ +- hlsl_src_from_node(&src, node_from_block(&expr)); +- hlsl_run_const_passes(ctx, &expr); +- node = src.node; +- hlsl_src_remove(&src); +- +- if (node->type == HLSL_IR_CONSTANT) +- { +- constant = hlsl_ir_constant(node); +- ret = constant->value.u[0]; +- } +- else +- { +- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Failed to evaluate constant expression."); +- } +- +- hlsl_block_cleanup(&expr); +- +- return ret; +-} +- +-static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, +- const struct vkd3d_shader_location *loc) +-{ +- union hlsl_constant_value_component res; +- +- res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); +- return res.u; +-} +- + static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) + { + /* Scalar vars can be converted to pretty much everything */ +@@ -4676,6 +4688,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + return true; + } + ++static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *expr; ++ ++ if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, ++ operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, expr); ++ ++ return true; ++} ++ + static const struct intrinsic_function + { + const char *name; +@@ -4688,6 +4714,7 @@ intrinsic_functions[] = + { + /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, ++ {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, + {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, + {"all", 1, true, intrinsic_all}, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index f9f5c8ed58a..7e4f168675e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -1981,6 +1981,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc + return progress; + } + ++enum validation_result ++{ ++ DEREF_VALIDATION_OK, ++ DEREF_VALIDATION_OUT_OF_BOUNDS, ++ DEREF_VALIDATION_NOT_CONSTANT, ++}; ++ ++static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, ++ const struct hlsl_deref *deref) ++{ ++ struct hlsl_type *type = deref->var->data_type; ++ unsigned int i; ++ ++ for (i = 0; i < deref->path_len; ++i) ++ { ++ struct hlsl_ir_node *path_node = deref->path[i].node; ++ unsigned int idx = 0; ++ ++ assert(path_node); ++ if (path_node->type != HLSL_IR_CONSTANT) ++ return DEREF_VALIDATION_NOT_CONSTANT; ++ ++ /* We should always have generated a cast to UINT. */ ++ assert(path_node->data_type->class == HLSL_CLASS_SCALAR ++ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_VECTOR: ++ if (idx >= type->dimx) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Vector index is out of bounds. %u/%u", idx, type->dimx); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_MATRIX: ++ if (idx >= hlsl_type_major_size(type)) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_ARRAY: ++ if (idx >= type->e.array.elements_count) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); ++ } ++ ++ return DEREF_VALIDATION_OK; ++} ++ + static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const char *usage) + { +@@ -1998,60 +2068,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct + } + } + +-static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, ++static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + void *context) + { +- unsigned int start, count; +- +- if (instr->type == HLSL_IR_RESOURCE_LOAD) ++ switch (instr->type) + { +- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); +- +- if (!load->resource.var->is_uniform) ++ case HLSL_IR_RESOURCE_LOAD: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Loaded resource must have a single uniform source."); ++ struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); ++ ++ if (!load->resource.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Loaded resource must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Loaded resource from \"%s\" must be determinable at compile time.", ++ load->resource.var->name); ++ note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); ++ } ++ ++ if (load->sampler.var) ++ { ++ if (!load->sampler.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Resource load sampler must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Resource load sampler from \"%s\" must be determinable at compile time.", ++ load->sampler.var->name); ++ note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); ++ } ++ } ++ break; + } +- else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) ++ case HLSL_IR_RESOURCE_STORE: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Loaded resource from \"%s\" must be determinable at compile time.", +- load->resource.var->name); +- note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); +- } ++ struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + +- if (load->sampler.var) +- { +- if (!load->sampler.var->is_uniform) ++ if (!store->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Resource load sampler must have a single uniform source."); ++ "Accessed resource must have a single uniform source."); + } +- else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) ++ else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Resource load sampler from \"%s\" must be determinable at compile time.", +- load->sampler.var->name); +- note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); ++ "Accessed resource from \"%s\" must be determinable at compile time.", ++ store->resource.var->name); ++ note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); + } ++ break; + } +- } +- else if (instr->type == HLSL_IR_RESOURCE_STORE) +- { +- struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); +- +- if (!store->resource.var->is_uniform) ++ case HLSL_IR_LOAD: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Accessed resource must have a single uniform source."); ++ struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ validate_component_index_range_from_deref(ctx, &load->src); ++ break; + } +- else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) ++ case HLSL_IR_STORE: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Accessed resource from \"%s\" must be determinable at compile time.", +- store->resource.var->name); +- note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); ++ struct hlsl_ir_store *store = hlsl_ir_store(instr); ++ validate_component_index_range_from_deref(ctx, &store->lhs); ++ break; + } ++ default: ++ break; + } + + return false; +@@ -5203,21 +5290,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + { + case HLSL_CLASS_VECTOR: + if (idx >= type->dimx) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Vector index is out of bounds. %u/%u", idx, type->dimx); + return false; +- } + *start += idx; + break; + + case HLSL_CLASS_MATRIX: + if (idx >= hlsl_type_major_size(type)) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); + return false; +- } + if (hlsl_type_is_row_major(type)) + *start += idx * type->dimx; + else +@@ -5226,11 +5305,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); + return false; +- } + *start += idx * hlsl_type_component_count(type->e.array.type); + break; + +@@ -5635,6 +5710,222 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + sm1_generate_vsir_signature(ctx, program); + } + ++static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, ++ struct hlsl_block **found_block) ++{ ++ struct hlsl_ir_node *node; ++ ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (node == stop_point) ++ return NULL; ++ ++ if (node->type == HLSL_IR_IF) ++ { ++ struct hlsl_ir_if *iff = hlsl_ir_if(node); ++ struct hlsl_ir_jump *jump = NULL; ++ ++ if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) ++ return jump; ++ if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) ++ return jump; ++ } ++ else if (node->type == HLSL_IR_JUMP) ++ { ++ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); ++ ++ if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) ++ { ++ *found_block = block; ++ return jump; ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) ++{ ++ /* Always use the explicit limit if it has been passed. */ ++ if (loop->unroll_limit) ++ return loop->unroll_limit; ++ ++ /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ return 1024; ++ ++ /* SM4 limits implicit unrolling to 254 iterations. */ ++ if (hlsl_version_ge(ctx, 4, 0)) ++ return 254; ++ ++ /* SM<3 implicitly unrolls up to 1024 iterations. */ ++ return 1024; ++} ++ ++static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) ++{ ++ unsigned int max_iterations, i; ++ ++ max_iterations = loop_unrolling_get_max_iterations(ctx, loop); ++ ++ for (i = 0; i < max_iterations; ++i) ++ { ++ struct hlsl_block tmp_dst, *jump_block; ++ struct hlsl_ir_jump *jump = NULL; ++ ++ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) ++ return false; ++ list_move_before(&loop->node.entry, &tmp_dst.instrs); ++ hlsl_block_cleanup(&tmp_dst); ++ ++ hlsl_run_const_passes(ctx, block); ++ ++ if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) ++ { ++ enum hlsl_ir_jump_type type = jump->type; ++ ++ if (jump_block != loop_parent) ++ { ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, ++ "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); ++ return false; ++ } ++ ++ list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); ++ hlsl_block_cleanup(&tmp_dst); ++ ++ if (type == HLSL_IR_JUMP_BREAK) ++ break; ++ } ++ } ++ ++ /* Native will not emit an error if max_iterations has been reached with an ++ * explicit limit. It also will not insert a loop if there are iterations left ++ * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ ++ if (!loop->unroll_limit && i == max_iterations) ++ { ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, ++ "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); ++ return false; ++ } ++ ++ list_remove(&loop->node.entry); ++ hlsl_free_instr(&loop->node); ++ ++ return true; ++} ++ ++/* ++ * loop_unrolling_find_unrollable_loop() is not the normal way to do things; ++ * normal passes simply iterate over the whole block and apply a transformation ++ * to every relevant instruction. However, loop unrolling can fail, and we want ++ * to leave the loop in its previous state in that case. That isn't a problem by ++ * itself, except that loop unrolling needs copy-prop in order to work properly, ++ * and copy-prop state at the time of the loop depends on the rest of the program ++ * up to that point. This means we need to clone the whole program, and at that ++ * point we have to search it again anyway to find the clone of the loop we were ++ * going to unroll. ++ * ++ * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop ++ * up until the loop instruction, clone just that loop, then use copyprop again ++ * with the saved state after unrolling. However, copyprop currently isn't built ++ * for that yet [notably, it still relies on indices]. Note also this still doesn't ++ * really let us use transform_ir() anyway [since we don't have a good way to say ++ * "copyprop from the beginning of the program up to the instruction we're ++ * currently processing" from the callback]; we'd have to use a dedicated ++ * recursive function instead. */ ++static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_block **containing_block) ++{ ++ struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ switch (instr->type) ++ { ++ case HLSL_IR_LOOP: ++ { ++ struct hlsl_ir_loop *nested_loop; ++ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); ++ ++ if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) ++ return nested_loop; ++ ++ if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ { ++ *containing_block = block; ++ return loop; ++ } ++ ++ break; ++ } ++ case HLSL_IR_IF: ++ { ++ struct hlsl_ir_loop *loop; ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) ++ return loop; ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) ++ return loop; ++ ++ break; ++ } ++ case HLSL_IR_SWITCH: ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; ++ struct hlsl_ir_loop *loop; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ { ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) ++ return loop; ++ } ++ ++ break; ++ } ++ default: ++ break; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ while (true) ++ { ++ struct hlsl_block clone, *containing_block; ++ struct hlsl_ir_loop *loop, *cloned_loop; ++ ++ if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) ++ return; ++ ++ if (!hlsl_clone_block(ctx, &clone, block)) ++ return; ++ ++ cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); ++ assert(cloned_loop); ++ ++ if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) ++ { ++ hlsl_block_cleanup(&clone); ++ loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; ++ continue; ++ } ++ ++ hlsl_block_cleanup(block); ++ hlsl_block_init(block); ++ hlsl_block_add_block(block, &clone); ++ } ++} ++ + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) + { +@@ -5721,6 +6012,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } + ++ transform_unroll_loops(ctx, body); + hlsl_run_const_passes(ctx, body); + + remove_unreachable_code(ctx, body); +@@ -5730,7 +6022,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); + +- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); ++ hlsl_transform_ir(ctx, validate_dereferences, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index ca7cdfd5217..a7c37215e5e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -2762,6 +2762,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, ++ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) +@@ -2817,6 +2818,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, ++ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, +@@ -5042,6 +5044,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct + write_sm4_instruction(tpf, &instr); + } + ++static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; ++ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; ++ instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ + static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) + { + const struct hlsl_ir_node *arg1 = expr->operands[0].node; +@@ -5057,6 +5078,14 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + + switch (expr->op) + { ++ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: ++ if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) ++ write_sm4_rasterizer_sample_count(tpf, &expr->node); ++ else ++ hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); ++ break; ++ + case HLSL_OP1_ABS: + switch (dst_type->e.numeric.type) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index c79f845b675..96e613669a6 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -152,6 +152,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, + VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, + VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, ++ VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index db2d87428bb..7d7f40c0953 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -4349,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript + return hr; + + descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); +- d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); ++ if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) ++ { ++ vkd3d_private_store_destroy(&descriptor_heap->private_store); ++ return hr; ++ } + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + + d3d12_device_add_ref(descriptor_heap->device = device); +-- +2.43.0 +