From 3738f9baee74c66eefdbf51b59b8567a6ff0c898 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 18 Dec 2024 06:42:56 +1100 Subject: [PATCH] Updated vkd3d-latest patchset Squashed to release + update. --- ...-9619582d1b6a54720e17a148a72b446fda2.patch | 4878 ----- ...-a68fd1b0ded735580b0ec9025f75fe02d62.patch | 14861 ++++++++++++++++ ...-39cbef9e018ee760ffd175fdd6729e47052.patch | 1105 -- ...-36fda8e28ca31517ae051b2e46b00d71a23.patch | 1803 -- ...-01117c716dea0e934ac594a7596d90ad948.patch | 302 - ...-65b67e84a8ec23d4532166cebed86095414.patch | 4053 ----- ...-5827197246214a3b1a362f19a0ac4de426e.patch | 1915 -- 7 files changed, 14861 insertions(+), 14056 deletions(-) delete mode 100644 patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch create mode 100644 patches/vkd3d-latest/0001-Updated-vkd3d-to-a68fd1b0ded735580b0ec9025f75fe02d62.patch delete mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch delete mode 100644 patches/vkd3d-latest/0003-Updated-vkd3d-to-36fda8e28ca31517ae051b2e46b00d71a23.patch delete mode 100644 patches/vkd3d-latest/0004-Updated-vkd3d-to-01117c716dea0e934ac594a7596d90ad948.patch delete mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-65b67e84a8ec23d4532166cebed86095414.patch delete mode 100644 patches/vkd3d-latest/0006-Updated-vkd3d-to-5827197246214a3b1a362f19a0ac4de426e.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch deleted file mode 100644 index befe0142..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch +++ /dev/null @@ -1,4878 +0,0 @@ -From 15951174c1034c55fafecf217b0b8bbafe414f32 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 29 Nov 2024 07:14:57 +1100 -Subject: [PATCH] Updated vkd3d to 9619582d1b6a54720e17a148a72b446fda2fd41f. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 2 +- - libs/vkd3d/include/vkd3d_shader.h | 25 + - libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 + - libs/vkd3d/libs/vkd3d-shader/dxil.c | 29 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 151 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 69 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 38 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 26 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1164 ++++++++++++++--- - libs/vkd3d/libs/vkd3d-shader/ir.c | 519 +++++++- - libs/vkd3d/libs/vkd3d-shader/msl.c | 237 +++- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 80 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 1090 +-------------- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 20 + - .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + - 18 files changed, 2038 insertions(+), 1420 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index ec1dd70c9b2..fd62730f948 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -275,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index f95caa2f825..cb561d7f079 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -112,6 +112,11 @@ enum vkd3d_shader_structure_type - * \since 1.13 - */ - VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, -+ /** -+ * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. -+ * \since 1.15 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -2040,6 +2045,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info - unsigned int combined_sampler_count; - }; - -+/** -+ * A chained structure describing the tessellation information in a hull shader. -+ * -+ * This structure extends vkd3d_shader_compile_info. -+ * -+ * \since 1.15 -+ */ -+struct vkd3d_shader_scan_hull_shader_tessellation_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** The tessellation output primitive. */ -+ enum vkd3d_shader_tessellator_output_primitive output_primitive; -+ /** The tessellation partitioning mode. */ -+ enum vkd3d_shader_tessellator_partitioning partitioning; -+}; -+ - /** - * Data type of a shader varying, returned as part of struct - * vkd3d_shader_signature_element. -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index f60ef7db769..c2c6ad67804 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -20,6 +20,7 @@ - #define WIDL_C_INLINE_WRAPPERS - #endif - #define COBJMACROS -+ - #define CONST_VTABLE - #include "vkd3d.h" - #include "vkd3d_blob.h" -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 9e2eacbcfa6..bda9bc72f56 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1565,6 +1565,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -1671,6 +1672,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 7099bcc9ce2..71f3c7f17b0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -3824,7 +3824,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par - } - - static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( -- enum vkd3d_shader_sysval_semantic sysval_semantic) -+ enum vkd3d_shader_sysval_semantic sysval_semantic, bool is_input) - { - switch (sysval_semantic) - { -@@ -3834,7 +3834,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - case VKD3D_SHADER_SV_SAMPLE_INDEX: - return VKD3DSPR_NULL; - case VKD3D_SHADER_SV_COVERAGE: -- return VKD3DSPR_COVERAGE; -+ return is_input ? VKD3DSPR_COVERAGE : VKD3DSPR_SAMPLEMASK; - case VKD3D_SHADER_SV_DEPTH: - return VKD3DSPR_DEPTHOUT; - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: -@@ -3884,7 +3884,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - param = ¶ms[i]; - - if (e->register_index == UINT_MAX -- && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) -+ && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input)) != VKD3DSPR_NULL) - { - dst_param_io_init(param, e, io_reg_type); - continue; -@@ -9348,7 +9348,7 @@ static void signature_element_read_additional_element_values(struct signature_el - } - - static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -- struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) -+ struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain, bool is_input) - { - unsigned int i, j, column_count, operand_count, index; - const struct sm6_metadata_node *node, *element_node; -@@ -9466,7 +9466,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const - - if ((is_register = e->register_index == UINT_MAX)) - { -- if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) -+ if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input) == VKD3DSPR_INVALID) - { - WARN("Unhandled I/O register semantic kind %u.\n", j); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -@@ -9578,17 +9578,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons - } - - if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], -- &program->input_signature, tessellator_domain)) < 0) -+ &program->input_signature, tessellator_domain, true)) < 0) - { - return ret; - } - if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], -- &program->output_signature, tessellator_domain)) < 0) -+ &program->output_signature, tessellator_domain, false)) < 0) - { - return ret; - } - if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], -- &program->patch_constant_signature, tessellator_domain)) < 0) -+ &program->patch_constant_signature, tessellator_domain, false)) < 0) - { - return ret; - } -@@ -9717,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, - - ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); - ins->declaration.tessellator_domain = tessellator_domain; -+ sm6->p.program->tess_domain = tessellator_domain; - } - --static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, -- const char *type) -+static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, -+ unsigned int count, bool allow_zero, const char *type) - { -- if (!count || count > 32) -+ if ((!count && !allow_zero) || count > 32) - { - WARN("%s control point count %u invalid.\n", type, count); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, -@@ -9951,7 +9952,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa - } - - sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); -- sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); -+ sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); - sm6->p.program->input_control_point_count = operands[1]; - - return operands[0]; -@@ -10010,9 +10011,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa - } - } - -- sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); -+ sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); - program->input_control_point_count = operands[1]; -- sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); -+ sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); - sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); - program->output_control_point_count = operands[2]; - sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index e22177e1e30..064e15c4b60 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -36,6 +36,16 @@ struct fx_4_binary_type - uint32_t typeinfo; - }; - -+struct fx_5_shader -+{ -+ uint32_t offset; -+ uint32_t sodecl[4]; -+ uint32_t sodecl_count; -+ uint32_t rast_stream; -+ uint32_t iface_bindings_count; -+ uint32_t iface_bindings; -+}; -+ - struct string_entry - { - struct rb_entry entry; -@@ -550,6 +560,8 @@ enum fx_4_type_constants - FX_4_ASSIGNMENT_VARIABLE = 0x2, - FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, - FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, -+ FX_4_ASSIGNMENT_INLINE_SHADER = 0x7, -+ FX_5_ASSIGNMENT_INLINE_SHADER = 0x8, - }; - - static const uint32_t fx_4_numeric_base_types[] = -@@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_NULL: -+ case HLSL_CLASS_STREAM_OUTPUT: - vkd3d_unreachable(); - - case HLSL_CLASS_VOID: -@@ -1298,6 +1311,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_NULL: -+ case HLSL_CLASS_STREAM_OUTPUT: - /* This cannot appear as an extern variable. */ - break; - } -@@ -1834,6 +1848,7 @@ enum state_property_component_type - FX_BLEND, - FX_VERTEXSHADER, - FX_PIXELSHADER, -+ FX_GEOMETRYSHADER, - FX_COMPONENT_TYPE_COUNT, - }; - -@@ -2065,6 +2080,7 @@ fx_4_states[] = - - { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, - { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, -+ { "GeometryShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_GEOMETRYSHADER, 1, 1, 8 }, - { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, - { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, - { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, -@@ -2951,7 +2967,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en - - static int fx_2_parse(struct fx_parser *parser) - { -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented."); - - return -1; - } -@@ -3120,7 +3136,7 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) - else - { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -- "Only numeric and string types are supported in annotations.\n"); -+ "Only numeric and string types are supported in annotations."); - } - - if (type.element_count) -@@ -3210,27 +3226,13 @@ static void fx_parse_buffers(struct fx_parser *parser) - } - } - --static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) -+static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) - { - struct vkd3d_shader_compile_info info = { 0 }; - struct vkd3d_shader_code output; -- uint32_t data_size, offset; - const void *data = NULL; - const char *p, *q, *end; -- struct fx_5_shader -- { -- uint32_t offset; -- uint32_t sodecl[4]; -- uint32_t sodecl_count; -- uint32_t rast_stream; -- uint32_t iface_bindings_count; -- uint32_t iface_bindings; -- } shader5; -- struct fx_4_gs_so -- { -- uint32_t offset; -- uint32_t sodecl; -- } gs_so; -+ uint32_t data_size; - int ret; - - static const struct vkd3d_shader_compile_option options[] = -@@ -3238,35 +3240,9 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int - {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, - }; - -- switch (object_type) -- { -- case FX_4_OBJECT_TYPE_PIXEL_SHADER: -- case FX_4_OBJECT_TYPE_VERTEX_SHADER: -- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -- offset = fx_parser_read_u32(parser); -- break; -- -- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -- fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); -- offset = gs_so.offset; -- break; -- -- case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -- case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -- case FX_5_OBJECT_TYPE_HULL_SHADER: -- case FX_5_OBJECT_TYPE_DOMAIN_SHADER: -- fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); -- offset = shader5.offset; -- break; -- -- default: -- parser->failed = true; -- return; -- } -- -- fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); -+ fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size)); - if (data_size) -- data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); -+ data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size); - - if (!data) - return; -@@ -3283,7 +3259,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int - if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) - { - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -- "Failed to disassemble shader blob.\n"); -+ "Failed to disassemble shader blob."); - return; - } - parse_fx_print_indent(parser); -@@ -3307,26 +3283,58 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int - - parse_fx_print_indent(parser); - vkd3d_string_buffer_printf(&parser->buffer, "}"); -- if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) -+ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) - { - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", -- fx_4_get_string(parser, gs_so.sodecl)); -+ fx_4_get_string(parser, shader->sodecl[0])); - } - else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) - { -- for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) -+ for (unsigned int i = 0; i < ARRAY_SIZE(shader->sodecl); ++i) - { -- if (shader5.sodecl[i]) -+ if (shader->sodecl[i]) - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", -- i, fx_4_get_string(parser, shader5.sodecl[i])); -+ i, fx_4_get_string(parser, shader->sodecl[i])); - } -- if (shader5.sodecl_count) -- vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); -+ if (shader->sodecl_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); - } - - vkd3d_shader_free_shader_code(&output); - } - -+static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) -+{ -+ struct fx_5_shader shader = { 0 }; -+ -+ switch (object_type) -+ { -+ case FX_4_OBJECT_TYPE_PIXEL_SHADER: -+ case FX_4_OBJECT_TYPE_VERTEX_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -+ shader.offset = fx_parser_read_u32(parser); -+ break; -+ -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -+ shader.offset = fx_parser_read_u32(parser); -+ shader.sodecl[0] = fx_parser_read_u32(parser); -+ break; -+ -+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -+ case FX_5_OBJECT_TYPE_HULL_SHADER: -+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: -+ fx_parser_read_u32s(parser, &shader, sizeof(shader)); -+ break; -+ -+ default: -+ parser->failed = true; -+ return; -+ } -+ -+ fx_4_parse_shader_blob(parser, object_type, &shader); -+} -+ - static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) - { - switch (type->typeinfo) -@@ -3390,6 +3398,8 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 - [FX_UINT8] = "byte", - }; - const struct rhs_named_value *named_value; -+ struct fx_5_shader shader = { 0 }; -+ unsigned int shader_type = 0; - uint32_t i, j, comp_count; - struct fx_4_state *state; - -@@ -3400,7 +3410,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 - if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), - sizeof(*fx_4_states), fx_4_state_id_compare))) - { -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.", entry.id); - break; - } - -@@ -3486,9 +3496,38 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 - vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), - fx_4_get_string(parser, index.index)); - break; -+ case FX_4_ASSIGNMENT_INLINE_SHADER: -+ case FX_5_ASSIGNMENT_INLINE_SHADER: -+ { -+ bool shader5 = entry.type == FX_5_ASSIGNMENT_INLINE_SHADER; -+ -+ if (shader5) -+ fx_parser_read_unstructured(parser, &shader, entry.value, sizeof(shader)); -+ else -+ fx_parser_read_unstructured(parser, &shader, entry.value, 2 * sizeof(uint32_t)); -+ -+ if (state->type == FX_PIXELSHADER) -+ shader_type = FX_4_OBJECT_TYPE_PIXEL_SHADER; -+ else if (state->type == FX_VERTEXSHADER) -+ shader_type = FX_4_OBJECT_TYPE_VERTEX_SHADER; -+ else if (state->type == FX_GEOMETRYSHADER) -+ shader_type = shader5 ? FX_5_OBJECT_TYPE_GEOMETRY_SHADER : FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO; -+ else if (state->type == FX_HULLSHADER) -+ shader_type = FX_5_OBJECT_TYPE_HULL_SHADER; -+ else if (state->type == FX_DOMAINSHADER) -+ shader_type = FX_5_OBJECT_TYPE_DOMAIN_SHADER; -+ else if (state->type == FX_COMPUTESHADER) -+ shader_type = FX_5_OBJECT_TYPE_COMPUTE_SHADER; -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ parse_fx_start_indent(parser); -+ fx_4_parse_shader_blob(parser, shader_type, &shader); -+ parse_fx_end_indent(parser); -+ break; -+ } - default: - fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -- "Unsupported assignment type %u.\n", entry.type); -+ "Unsupported assignment type %u.", entry.type); - } - vkd3d_string_buffer_printf(&parser->buffer, ";\n"); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 96de18dc886..97c6c0a1377 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -287,6 +287,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_VOID: - case HLSL_CLASS_NULL: - return false; -@@ -434,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -525,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_STREAM_OUTPUT: - break; - } - vkd3d_unreachable(); -@@ -680,6 +683,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_CONSTANT_BUFFER: - case HLSL_CLASS_NULL: -+ case HLSL_CLASS_STREAM_OUTPUT: - vkd3d_unreachable(); - } - type = next_type; -@@ -898,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba - return type; - } - -+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, -+ enum hlsl_so_object_type so_type, struct hlsl_type *data_type) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) -+ return NULL; -+ type->class = HLSL_CLASS_STREAM_OUTPUT; -+ type->e.so.so_type = so_type; -+ type->e.so.type = data_type; -+ -+ list_add_tail(&ctx->types, &type->entry); -+ -+ return type; -+} -+ - struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, - struct hlsl_struct_field *fields, size_t field_count) - { -@@ -1086,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_STREAM_OUTPUT: - break; - } - -@@ -1157,6 +1178,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - case HLSL_CLASS_CONSTANT_BUFFER: - return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); - -+ case HLSL_CLASS_STREAM_OUTPUT: -+ if (t1->e.so.so_type != t2->e.so.so_type) -+ return false; -+ return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); -+ - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -@@ -1695,22 +1721,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * - return &s->node; - } - --struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, -- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_vsir_instruction_ref *vsir_instr; -- -- if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) -- return NULL; -- init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); -- vsir_instr->vsir_instr_idx = vsir_instr_idx; -- -- if (reg) -- vsir_instr->node.reg = *reg; -- -- return &vsir_instr->node; --} -- - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) - { -@@ -2533,9 +2543,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - - case HLSL_IR_STATEBLOCK_CONSTANT: - return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); -- -- case HLSL_IR_VSIR_INSTRUCTION_REF: -- vkd3d_unreachable(); - } - - vkd3d_unreachable(); -@@ -2836,6 +2843,20 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - vkd3d_string_buffer_printf(string, ""); - return string; - -+ case HLSL_CLASS_STREAM_OUTPUT: -+ if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) -+ vkd3d_string_buffer_printf(string, "PointStream"); -+ else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) -+ vkd3d_string_buffer_printf(string, "LineStream"); -+ else -+ vkd3d_string_buffer_printf(string, "TriangleStream"); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.so.type))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } -+ return string; -+ - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: -@@ -2968,7 +2989,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", - [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", - [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", -- [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -3562,11 +3582,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - case HLSL_IR_STATEBLOCK_CONSTANT: - dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); - break; -- -- case HLSL_IR_VSIR_INSTRUCTION_REF: -- vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", -- hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); -- break; - } - } - -@@ -3875,10 +3890,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - case HLSL_IR_STATEBLOCK_CONSTANT: - free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); - break; -- -- case HLSL_IR_VSIR_INSTRUCTION_REF: -- vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); -- break; - } - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 075c76cb0e2..25d1b8df947 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -22,7 +22,6 @@ - - #include "vkd3d_shader_private.h" - #include "wine/rbtree.h" --#include "d3dcommon.h" - #include "d3dx9shader.h" - - /* The general IR structure is inspired by Mesa GLSL hir, even though the code -@@ -105,6 +104,7 @@ enum hlsl_type_class - HLSL_CLASS_GEOMETRY_SHADER, - HLSL_CLASS_CONSTANT_BUFFER, - HLSL_CLASS_BLEND_STATE, -+ HLSL_CLASS_STREAM_OUTPUT, - HLSL_CLASS_VOID, - HLSL_CLASS_NULL, - HLSL_CLASS_ERROR, -@@ -142,6 +142,13 @@ enum hlsl_sampler_dim - /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ - }; - -+enum hlsl_so_object_type -+{ -+ HLSL_STREAM_OUTPUT_POINT_STREAM, -+ HLSL_STREAM_OUTPUT_LINE_STREAM, -+ HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, -+}; -+ - enum hlsl_regset - { - HLSL_REGSET_SAMPLERS, -@@ -220,6 +227,12 @@ struct hlsl_type - } resource; - /* Additional field to distinguish object types. Currently used only for technique types. */ - unsigned int version; -+ /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ -+ struct -+ { -+ struct hlsl_type *type; -+ enum hlsl_so_object_type so_type; -+ } so; - } e; - - /* Number of numeric register components used by one value of this type, for each regset. -@@ -330,8 +343,6 @@ enum hlsl_ir_node_type - HLSL_IR_COMPILE, - HLSL_IR_SAMPLER_STATE, - HLSL_IR_STATEBLOCK_CONSTANT, -- -- HLSL_IR_VSIR_INSTRUCTION_REF, - }; - - /* Common data for every type of IR instruction node. */ -@@ -934,16 +945,6 @@ struct hlsl_ir_stateblock_constant - char *name; - }; - --/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. -- * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ --struct hlsl_ir_vsir_instruction_ref --{ -- struct hlsl_ir_node node; -- -- /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ -- unsigned int vsir_instr_idx; --}; -- - struct hlsl_scope - { - /* Item entry for hlsl_ctx.scopes. */ -@@ -1259,12 +1260,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co - return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); - } - --static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) --{ -- VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); -- return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); --} -- - static inline void hlsl_block_init(struct hlsl_block *block) - { - list_init(&block->instrs); -@@ -1519,6 +1514,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond - struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); -+struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, -+ enum hlsl_so_object_type so_type, struct hlsl_type *type); - struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); - -@@ -1588,9 +1585,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned - struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, - struct list *cases, const struct vkd3d_shader_location *loc); - --struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, -- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); -- - void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); - void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 8dace11916a..31fb30521e9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -104,6 +104,7 @@ if {return KW_IF; } - in {return KW_IN; } - inline {return KW_INLINE; } - inout {return KW_INOUT; } -+LineStream {return KW_LINESTREAM; } - linear {return KW_LINEAR; } - matrix {return KW_MATRIX; } - namespace {return KW_NAMESPACE; } -@@ -114,6 +115,7 @@ out {return KW_OUT; } - packoffset {return KW_PACKOFFSET; } - pass {return KW_PASS; } - PixelShader {return KW_PIXELSHADER; } -+PointStream {return KW_POINTSTREAM; } - pixelshader {return KW_PIXELSHADER; } - RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } - RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } -@@ -170,6 +172,7 @@ texture3D {return KW_TEXTURE3D; } - TextureCube {return KW_TEXTURECUBE; } - textureCUBE {return KW_TEXTURECUBE; } - TextureCubeArray {return KW_TEXTURECUBEARRAY; } -+TriangleStream {return KW_TRIANGLESTREAM; } - true {return KW_TRUE; } - typedef {return KW_TYPEDEF; } - unsigned {return KW_UNSIGNED; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 60aade732db..5bcd5e9034b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -678,8 +678,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - break; -- case HLSL_IR_VSIR_INSTRUCTION_REF: -- vkd3d_unreachable(); - } - } - -@@ -6553,6 +6551,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - struct hlsl_semantic semantic; - enum hlsl_buffer_type buffer_type; - enum hlsl_sampler_dim sampler_dim; -+ enum hlsl_so_object_type so_type; - struct hlsl_attribute *attr; - struct parse_attribute_list attr_list; - struct hlsl_ir_switch_case *switch_case; -@@ -6596,6 +6595,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_INLINE - %token KW_INOUT - %token KW_LINEAR -+%token KW_LINESTREAM - %token KW_MATRIX - %token KW_NAMESPACE - %token KW_NOINTERPOLATION -@@ -6605,6 +6605,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_PACKOFFSET - %token KW_PASS - %token KW_PIXELSHADER -+%token KW_POINTSTREAM - %token KW_RASTERIZERORDEREDBUFFER - %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER - %token KW_RASTERIZERORDEREDTEXTURE1D -@@ -6654,6 +6655,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_TEXTURE3D - %token KW_TEXTURECUBE - %token KW_TEXTURECUBEARRAY -+%token KW_TRIANGLESTREAM - %token KW_TRUE - %token KW_TYPEDEF - %token KW_UNSIGNED -@@ -6784,6 +6786,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - - %type semantic - -+%type so_type -+ - %type state_block - - %type state_block_index_opt -@@ -7805,6 +7809,20 @@ rov_type: - $$ = HLSL_SAMPLER_DIM_3D; - } - -+so_type: -+ KW_POINTSTREAM -+ { -+ $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; -+ } -+ | KW_LINESTREAM -+ { -+ $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; -+ } -+ | KW_TRIANGLESTREAM -+ { -+ $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; -+ } -+ - resource_format: - var_modifiers type - { -@@ -7948,6 +7966,10 @@ type_no_void: - validate_uav_type(ctx, $1, $3, &@4); - $$ = hlsl_new_uav_type(ctx, $1, $3, true); - } -+ | so_type '<' type '>' -+ { -+ $$ = hlsl_new_stream_output_type(ctx, $1, $3); -+ } - | KW_RWBYTEADDRESSBUFFER - { - $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index d11ff481f6b..8f45628dbee 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -19,6 +19,7 @@ - */ - - #include "hlsl.h" -+#include "vkd3d_shader_private.h" - #include - #include - -@@ -1678,6 +1679,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - -@@ -4162,9 +4164,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_STATEBLOCK_CONSTANT: - /* Stateblock constants should not appear in the shader program. */ - vkd3d_unreachable(); -- case HLSL_IR_VSIR_INSTRUCTION_REF: -- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ -- vkd3d_unreachable(); - } - - return false; -@@ -4304,9 +4303,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - case HLSL_IR_STATEBLOCK_CONSTANT: - /* Stateblock constants should not appear in the shader program. */ - vkd3d_unreachable(); -- case HLSL_IR_VSIR_INSTRUCTION_REF: -- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ -- vkd3d_unreachable(); - - case HLSL_IR_STORE: - { -@@ -4494,6 +4490,9 @@ struct register_allocator - - /* Two allocations with different mode can't share the same register. */ - int mode; -+ /* If an allocation is VIP, no new allocations can be made in the -+ * register unless they are VIP as well. */ -+ bool vip; - } *allocations; - size_t count, capacity; - -@@ -4513,7 +4512,7 @@ struct register_allocator - }; - - static unsigned int get_available_writemask(const struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) -+ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip) - { - unsigned int writemask = VKD3DSP_WRITEMASK_ALL; - size_t i; -@@ -4532,6 +4531,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all - writemask &= ~allocation->writemask; - if (allocation->mode != mode) - writemask = 0; -+ if (allocation->vip && !vip) -+ writemask = 0; - } - - if (!writemask) -@@ -4542,7 +4543,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all - } - - static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, -- unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) -+ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip) - { - struct allocation *allocation; - -@@ -4556,16 +4557,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a - allocation->first_write = first_write; - allocation->last_read = last_read; - allocation->mode = mode; -+ allocation->vip = vip; - - allocator->reg_count = max(allocator->reg_count, reg_idx + 1); - } - --/* reg_size is the number of register components to be reserved, while component_count is the number -- * of components for the register's writemask. In SM1, floats and vectors allocate the whole -- * register, even if they don't use it completely. */ -+/* Allocates a register (or some components of it) within the register allocator. -+ * 'reg_size' is the number of register components to be reserved. -+ * 'component_count' is the number of components for the hlsl_reg's -+ * writemask, which can be smaller than 'reg_size'. For instance, sm1 -+ * floats and vectors allocate the whole register even if they are not -+ * using all components. -+ * 'mode' can be provided to avoid allocating on a register that already has an -+ * allocation with a different mode. -+ * 'force_align' can be used so that the allocation always start in '.x'. -+ * 'vip' can be used so that no new allocations can be made in the given register -+ * unless they are 'vip' as well. */ - static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, -- unsigned int component_count, int mode, bool force_align) -+ unsigned int component_count, int mode, bool force_align, bool vip) - { - struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; - unsigned int required_size = force_align ? 4 : reg_size; -@@ -4579,7 +4589,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) - { - unsigned int available_writemask = get_available_writemask(allocator, -- first_write, last_read, reg_idx, mode); -+ first_write, last_read, reg_idx, mode, vip); - - if (vkd3d_popcount(available_writemask) >= pref) - { -@@ -4589,7 +4599,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - ret.id = reg_idx; - ret.writemask = hlsl_combine_writemasks(writemask, - vkd3d_write_mask_from_component_count(component_count)); -- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); -+ -+ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip); - return ret; - } - } -@@ -4598,13 +4609,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - ret.id = allocator->reg_count; - ret.writemask = vkd3d_write_mask_from_component_count(component_count); - record_allocation(ctx, allocator, allocator->reg_count, -- vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); -+ vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip); - return ret; - } - - /* Allocate a register with writemask, while reserving reg_writemask. */ --static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) -+static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, -+ struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, -+ uint32_t reg_writemask, uint32_t writemask, int mode, bool vip) - { - struct hlsl_reg ret = {0}; - uint32_t reg_idx; -@@ -4614,11 +4626,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct - for (reg_idx = 0;; ++reg_idx) - { - if ((get_available_writemask(allocator, first_write, last_read, -- reg_idx, mode) & reg_writemask) == reg_writemask) -+ reg_idx, mode, vip) & reg_writemask) == reg_writemask) - break; - } - -- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); -+ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); - - ret.id = reg_idx; - ret.allocation_size = 1; -@@ -4628,7 +4640,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct - } - - static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, -- unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) -+ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) - { - unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; - unsigned int writemask; -@@ -4636,18 +4648,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig - - for (i = 0; i < (reg_size / 4); ++i) - { -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); -+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip); - if (writemask != VKD3DSP_WRITEMASK_ALL) - return false; - } -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); -+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip); - if ((writemask & last_reg_mask) != last_reg_mask) - return false; - return true; - } - - static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) -+ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip) - { - struct hlsl_reg ret = {0}; - uint32_t reg_idx; -@@ -4655,15 +4667,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo - - for (reg_idx = 0;; ++reg_idx) - { -- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) -+ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip)) - break; - } - - for (i = 0; i < reg_size / 4; ++i) -- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); -+ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip); - if (reg_size % 4) - record_allocation(ctx, allocator, reg_idx + (reg_size / 4), -- (1u << (reg_size % 4)) - 1, first_write, last_read, mode); -+ (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); - - ret.id = reg_idx; - ret.allocation_size = align(reg_size, 4) / 4; -@@ -4679,9 +4691,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, - /* FIXME: We could potentially pack structs or arrays more efficiently... */ - - if (type->class <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); -+ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false); - else -- return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); -+ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); - } - - static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -4859,8 +4871,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, - } - - if (reg_writemask) -- instr->reg = allocate_register_with_masks(ctx, allocator, -- instr->index, instr->last_read, reg_writemask, dst_writemask, 0); -+ instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, -+ instr->last_read, reg_writemask, dst_writemask, 0, false); - else - instr->reg = allocate_numeric_registers_for_type(ctx, allocator, - instr->index, instr->last_read, instr->data_type); -@@ -5181,14 +5193,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - { - if (i < bind_count) - { -- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) -+ if (get_available_writemask(&allocator_used, 1, UINT_MAX, -+ reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Overlapping register() reservations on 'c%u'.", reg_idx + i); - } -- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); -+ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); - } -- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); -+ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); - } - - var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; -@@ -5211,7 +5224,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - { -- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); - TRACE("Allocated %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); - } -@@ -5254,7 +5267,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun - var = entry_func->parameters.vars[i]; - if (var->is_output_semantic) - { -- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); -+ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, -+ var->first_write, var->last_read, 0, false); - break; - } - } -@@ -5311,6 +5325,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - enum vkd3d_shader_register_type type; - struct vkd3d_shader_version version; -+ bool special_interpolation = false; -+ bool vip_allocation = false; - uint32_t reg; - bool builtin; - -@@ -5363,6 +5379,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - * domains, it is allocated as if it was 'float[1]'. */ - var->force_align = true; - } -+ -+ if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX -+ || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX -+ || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) -+ vip_allocation = true; -+ -+ if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX) -+ special_interpolation = true; - } - - if (builtin) -@@ -5376,8 +5400,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - unsigned int reg_size = optimize ? var->data_type->dimx : 4; - -- var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, -- UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); -+ if (special_interpolation) -+ mode = VKD3DSIM_NONE; -+ -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, -+ reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation); - - TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', - var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); -@@ -6812,7 +6839,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src - } - - static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, -- struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) -+ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) - { - struct hlsl_ir_constant *constant; - -@@ -6832,6 +6859,242 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, - } - } - -+static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) -+{ -+ const struct hlsl_ir_var *var = deref->var; -+ unsigned int offset_const_deref; -+ -+ reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; -+ reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ -+ if (!var->indexable) -+ { -+ offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx[0].offset += offset_const_deref / 4; -+ reg->idx_count = 1; -+ } -+ else -+ { -+ offset_const_deref = deref->const_offset; -+ reg->idx[1].offset = offset_const_deref / 4; -+ reg->idx_count = 2; -+ -+ if (deref->rel_offset.node) -+ { -+ struct vkd3d_shader_src_param *idx_src; -+ -+ if (!(idx_src = vsir_program_get_src_params(program, 1))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return false; -+ } -+ memset(idx_src, 0, sizeof(*idx_src)); -+ reg->idx[1].rel_addr = idx_src; -+ -+ vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); -+ } -+ } -+ -+ *writemask = 0xf & (0xf << (offset_const_deref % 4)); -+ if (var->regs[HLSL_REGSET_NUMERIC].writemask) -+ *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); -+ return true; -+} -+ -+static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); -+ const struct hlsl_ir_var *var = deref->var; -+ -+ if (var->is_uniform) -+ { -+ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); -+ -+ if (regset == HLSL_REGSET_TEXTURES) -+ { -+ reg->type = VKD3DSPR_RESOURCE; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else if (regset == HLSL_REGSET_UAVS) -+ { -+ reg->type = VKD3DSPR_UAV; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else if (regset == HLSL_REGSET_SAMPLERS) -+ { -+ reg->type = VKD3DSPR_SAMPLER; -+ reg->dimension = VSIR_DIMENSION_NONE; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); -+ *writemask = VKD3DSP_WRITEMASK_ALL; -+ } -+ else -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; -+ -+ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); -+ reg->type = VKD3DSPR_CONSTBUFFER; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -+ reg->idx[2].offset = offset / 4; -+ reg->idx_count = 3; -+ } -+ else -+ { -+ reg->idx[0].offset = var->buffer->reg.index; -+ reg->idx[1].offset = offset / 4; -+ reg->idx_count = 2; -+ } -+ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); -+ } -+ } -+ else if (var->is_input_semantic) -+ { -+ bool has_idx; -+ -+ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -+ -+ if (has_idx) -+ { -+ reg->idx[0].offset = var->semantic.index + offset / 4; -+ reg->idx_count = 1; -+ } -+ -+ if (shader_sm4_is_scalar_register(reg)) -+ reg->dimension = VSIR_DIMENSION_SCALAR; -+ else -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ } -+ else -+ { -+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -+ -+ VKD3D_ASSERT(hlsl_reg.allocated); -+ -+ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ reg->type = VKD3DSPR_PATCHCONST; -+ else -+ reg->type = VKD3DSPR_INPUT; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ reg->idx[0].offset = hlsl_reg.id; -+ reg->idx_count = 1; -+ *writemask = hlsl_reg.writemask; -+ } -+ } -+ else if (var->is_output_semantic) -+ { -+ bool has_idx; -+ -+ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) -+ { -+ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -+ -+ if (has_idx) -+ { -+ reg->idx[0].offset = var->semantic.index + offset / 4; -+ reg->idx_count = 1; -+ } -+ -+ if (shader_sm4_is_scalar_register(reg)) -+ reg->dimension = VSIR_DIMENSION_SCALAR; -+ else -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -+ } -+ else -+ { -+ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -+ -+ VKD3D_ASSERT(hlsl_reg.allocated); -+ reg->type = VKD3DSPR_OUTPUT; -+ reg->dimension = VSIR_DIMENSION_VEC4; -+ reg->idx[0].offset = hlsl_reg.id; -+ reg->idx_count = 1; -+ *writemask = hlsl_reg.writemask; -+ } -+ } -+ else -+ { -+ return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); -+ } -+ return true; -+} -+ -+static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, -+ unsigned int dst_writemask, const struct vkd3d_shader_location *loc) -+{ -+ uint32_t writemask; -+ -+ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) -+ return false; -+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); -+ return true; -+} -+ -+static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, -+ const struct vkd3d_shader_location *loc, unsigned int writemask) -+{ -+ uint32_t reg_writemask; -+ -+ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) -+ return false; -+ dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); -+ return true; -+} -+ - static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, - struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) - { -@@ -7059,13 +7322,10 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: - generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); - return true; - -- case HLSL_TYPE_BOOL: -- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); -- break; -- - case HLSL_TYPE_DOUBLE: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); - break; -@@ -7659,40 +7919,6 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - sm1_generate_vsir_block(ctx, &entry_func->body, program); - } - --static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) --{ -- struct vkd3d_shader_location *loc; -- struct hlsl_ir_node *vsir_instr; -- -- loc = &program->instructions.elements[program->instructions.count - 1].location; -- -- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) -- { -- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -- return; -- } -- hlsl_block_add_instr(block, vsir_instr); --} -- --static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, -- struct vsir_program *program, struct hlsl_ir_node *instr) --{ -- struct vkd3d_shader_location *loc; -- struct hlsl_ir_node *vsir_instr; -- -- loc = &program->instructions.elements[program->instructions.count - 1].location; -- -- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, -- program->instructions.count - 1, instr->data_type, &instr->reg, loc))) -- { -- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -- return; -- } -- -- list_add_before(&instr->entry, &vsir_instr->entry); -- hlsl_replace_node(instr, vsir_instr); --} -- - static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, - const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, - const struct vkd3d_shader_location *loc) -@@ -7806,8 +8032,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs - - if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) - ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -- -- add_last_vsir_instr_to_block(ctx, program, block); - } - - static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, -@@ -7819,8 +8043,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ - return; - - ins->declaration.count = temp_count; -- -- add_last_vsir_instr_to_block(ctx, program, block); - } - - static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, -@@ -7838,8 +8060,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, - ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; - ins->declaration.indexable_temp.component_count = comp_count; - ins->declaration.indexable_temp.has_function_scope = false; -- -- add_last_vsir_instr_to_block(ctx, program, block); - } - - static bool type_is_float(const struct hlsl_type *type) -@@ -8505,99 +8725,730 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, - } - } - --static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) -+static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_store *store) - { -- struct vkd3d_string_buffer *dst_type_string; -- struct hlsl_ir_node *instr, *next; -- struct hlsl_ir_switch_case *c; -+ struct hlsl_ir_node *instr = &store->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; - -- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) -- { -- if (instr->data_type) -- { -- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -- { -- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -- break; -- } -- } -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return false; - -- switch (instr->type) -- { -- case HLSL_IR_CALL: -- vkd3d_unreachable(); -+ dst_param = &ins->dst[0]; -+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, -+ dst_param, &store->lhs, &instr->loc, store->writemask)) -+ return false; - -- case HLSL_IR_CONSTANT: -- /* In SM4 all constants are inlined. */ -- break; -+ src_param = &ins->src[0]; -+ vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); - -- case HLSL_IR_EXPR: -- if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) -- break; -+ return true; -+} - -- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) -- replace_instr_with_last_vsir_instr(ctx, program, instr); -+/* Does this variable's data come directly from the API user, rather than -+ * being temporary or from a previous shader stage? I.e. is it a uniform or -+ * VS input? */ -+static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) -+{ -+ if (var->is_uniform) -+ return true; - -- hlsl_release_string_buffer(ctx, dst_type_string); -- break; -+ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; -+} - -- case HLSL_IR_IF: -- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); -- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); -- break; -+static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ const struct hlsl_type *type = load->node.data_type; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct hlsl_ir_node *instr = &load->node; -+ struct vkd3d_shader_instruction *ins; -+ struct hlsl_constant_value value; - -- case HLSL_IR_LOOP: -- sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); -- break; -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); -+ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) -+ { -+ /* Uniform bools can be specified as anything, but internal bools -+ * always have 0 for false and ~0 for true. Normalise that here. */ - -- case HLSL_IR_SWITCH: -- LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) -- sm4_generate_vsir_block(ctx, &c->body, program); -- break; -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) -+ return false; - -- case HLSL_IR_SWIZZLE: -- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -- replace_instr_with_last_vsir_instr(ctx, program, instr); -- break; -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); - -- default: -- break; -- } -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) -+ return false; -+ -+ memset(&value, 0xff, sizeof(value)); -+ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, -+ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); -+ memset(&value, 0x00, sizeof(value)); -+ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, -+ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); -+ } -+ else -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) -+ return false; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) -+ return false; - } -+ return true; - } - --static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, -- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) -+static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_resource_store *store) - { -- bool is_patch_constant_func = func == ctx->patch_constant_func; -- struct hlsl_block block = {0}; -- struct hlsl_scope *scope; -- struct hlsl_ir_var *var; -- uint32_t temp_count; -- -- compute_liveness(ctx, func); -- mark_indexable_vars(ctx, func); -- temp_count = allocate_temp_registers(ctx, func); -- if (ctx->result) -- return; -- program->temp_count = max(program->temp_count, temp_count); -+ struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); -+ struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; -+ struct hlsl_ir_node *instr = &store->node; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int writemask; - -- hlsl_block_init(&block); -+ if (!store->resource.var->is_uniform) -+ { -+ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); -+ return false; -+ } - -- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { -- if ((var->is_input_semantic && var->last_read) -- || (var->is_output_semantic && var->first_write)) -- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); -+ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); -+ return false; - } - -- if (temp_count) -- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); -+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) -+ return false; - -- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); -+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, -+ &ins->dst[0], &store->resource, &instr->loc, writemask)) -+ return false; -+ } -+ else - { -- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -- { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) -+ return false; -+ -+ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, -+ &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) -+ return false; -+ } -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); -+ -+ return true; -+} -+ -+static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset) -+{ -+ struct hlsl_ir_constant *offset; -+ -+ VKD3D_ASSERT(texel_offset); -+ if (texel_offset->type != HLSL_IR_CONSTANT) -+ return false; -+ offset = hlsl_ir_constant(texel_offset); -+ -+ if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) -+ return false; -+ if (offset->node.data_type->dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) -+ return false; -+ if (offset->node.data_type->dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) -+ return false; -+ return true; -+} -+ -+static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi( -+ struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset) -+{ -+ struct hlsl_ir_constant *offset; -+ -+ if (!texel_offset) -+ return; -+ offset = hlsl_ir_constant(texel_offset); -+ -+ ins->texel_offset.u = offset->value.u[0].i; -+ ins->texel_offset.v = 0; -+ ins->texel_offset.w = 0; -+ if (offset->node.data_type->dimx > 1) -+ ins->texel_offset.v = offset->value.u[1].i; -+ if (offset->node.data_type->dimx > 2) -+ ins->texel_offset.w = offset->value.u[2].i; -+} -+ -+static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource); -+ bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS); -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; -+ const struct hlsl_ir_node *sample_index = load->sample_index.node; -+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -+ const struct hlsl_ir_node *coords = load->coords.node; -+ unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *instr = &load->node; -+ enum hlsl_sampler_dim dim = load->sampling_dim; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; -+ bool multisampled; -+ -+ VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD); -+ -+ multisampled = resource_type->class == HLSL_CLASS_TEXTURE -+ && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); -+ -+ if (uav) -+ opcode = VKD3DSIH_LD_UAV_TYPED; -+ else if (raw) -+ opcode = VKD3DSIH_LD_RAW; -+ else -+ opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled))) -+ return false; -+ -+ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) -+ { -+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ "Offset must resolve to integer literal in the range -8 to 7."); -+ return false; -+ } -+ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ if (!uav) -+ { -+ /* Mipmap level is in the last component in the IR, but needs to be in -+ * the W component in the instruction. */ -+ unsigned int dim_count = hlsl_sampler_dim_count(dim); -+ -+ if (dim_count == 1) -+ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; -+ if (dim_count == 2) -+ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; -+ } -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; -+ -+ if (multisampled) -+ { -+ if (sample_index->type == HLSL_IR_CONSTANT) -+ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, -+ &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0); -+ else if (version->major == 4 && version->minor == 0) -+ hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); -+ else -+ vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL); -+ } -+ return true; -+} -+ -+static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -+ const struct hlsl_ir_node *coords = load->coords.node; -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_deref *sampler = &load->sampler; -+ const struct hlsl_ir_node *instr = &load->node; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; -+ unsigned int src_count; -+ -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_SAMPLE: -+ opcode = VKD3DSIH_SAMPLE; -+ src_count = 3; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_CMP: -+ opcode = VKD3DSIH_SAMPLE_C; -+ src_count = 4; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_CMP_LZ: -+ opcode = VKD3DSIH_SAMPLE_C_LZ; -+ src_count = 4; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_LOD: -+ opcode = VKD3DSIH_SAMPLE_LOD; -+ src_count = 4; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ opcode = VKD3DSIH_SAMPLE_B; -+ src_count = 4; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ opcode = VKD3DSIH_SAMPLE_GRAD; -+ src_count = 5; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) -+ return false; -+ -+ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) -+ { -+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ "Offset must resolve to integer literal in the range -8 to 7."); -+ return false; -+ } -+ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], -+ resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2], -+ sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -+ return false; -+ -+ if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B) -+ { -+ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ } -+ else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ) -+ { -+ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL); -+ } -+ else if (opcode == VKD3DSIH_SAMPLE_GRAD) -+ { -+ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL); -+ } -+ return true; -+} -+ -+static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, -+ const struct hlsl_ir_resource_load *load, uint32_t swizzle) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -+ const struct hlsl_ir_node *coords = load->coords.node; -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_deref *sampler = &load->sampler; -+ const struct hlsl_ir_node *instr = &load->node; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_opcode opcode; -+ -+ opcode = VKD3DSIH_GATHER4; -+ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) -+ { -+ if (!vkd3d_shader_ver_ge(version, 5, 0)) -+ { -+ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -+ "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); -+ return false; -+ } -+ opcode = VKD3DSIH_GATHER4_PO; -+ } -+ -+ if (opcode == VKD3DSIH_GATHER4) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 3))) -+ return false; -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -+ return false; -+ ins->src[2].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[2].swizzle = swizzle; -+ } -+ else if (opcode == VKD3DSIH_GATHER4_PO) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 4))) -+ return false; -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[2], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[3], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) -+ return false; -+ ins->src[3].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[3].swizzle = swizzle; -+ } -+ else -+ { -+ vkd3d_unreachable(); -+ } -+ return true; -+} -+ -+static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *instr = &load->node; -+ struct hlsl_type *type = instr->data_type; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) -+ return false; -+ -+ if (type->e.numeric.type == HLSL_TYPE_UINT) -+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; -+ -+ return true; -+} -+ -+static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_resource_load *load) -+{ -+ const struct hlsl_deref *resource = &load->resource; -+ const struct hlsl_ir_node *instr = &load->node; -+ struct hlsl_type *type = instr->data_type; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER -+ || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -+ { -+ hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers."); -+ return false; -+ } -+ -+ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2))) -+ return false; -+ -+ if (type->e.numeric.type == HLSL_TYPE_UINT) -+ ins->flags = VKD3DSI_RESINFO_UINT; -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); -+ -+ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, -+ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) -+ return false; -+ -+ return true; -+} -+ -+static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_resource_load *load) -+{ -+ if (load->sampler.var && !load->sampler.var->is_uniform) -+ { -+ hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -+ return false; -+ } -+ -+ if (!load->resource.var->is_uniform) -+ { -+ hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); -+ return false; -+ } -+ -+ switch (load->load_type) -+ { -+ case HLSL_RESOURCE_LOAD: -+ return sm4_generate_vsir_instr_ld(ctx, program, load); -+ -+ case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_CMP: -+ case HLSL_RESOURCE_SAMPLE_CMP_LZ: -+ case HLSL_RESOURCE_SAMPLE_LOD: -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ /* Combined sample expressions were lowered. */ -+ VKD3D_ASSERT(load->sampler.var); -+ return sm4_generate_vsir_instr_sample(ctx, program, load); -+ -+ case HLSL_RESOURCE_GATHER_RED: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(X, X, X, X)); -+ -+ case HLSL_RESOURCE_GATHER_GREEN: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); -+ -+ case HLSL_RESOURCE_GATHER_BLUE: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)); -+ -+ case HLSL_RESOURCE_GATHER_ALPHA: -+ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(W, W, W, W)); -+ -+ case HLSL_RESOURCE_SAMPLE_INFO: -+ return sm4_generate_vsir_instr_sample_info(ctx, program, load); -+ -+ case HLSL_RESOURCE_RESINFO: -+ return sm4_generate_vsir_instr_resinfo(ctx, program, load); -+ -+ case HLSL_RESOURCE_SAMPLE_PROJ: -+ vkd3d_unreachable(); -+ -+ default: -+ return false; -+ } -+} -+ -+static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_jump *jump) -+{ -+ const struct hlsl_ir_node *instr = &jump->node; -+ struct vkd3d_shader_instruction *ins; -+ -+ switch (jump->type) -+ { -+ case HLSL_IR_JUMP_BREAK: -+ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0); -+ -+ case HLSL_IR_JUMP_CONTINUE: -+ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0); -+ -+ case HLSL_IR_JUMP_DISCARD_NZ: -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1))) -+ return false; -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL); -+ return true; -+ -+ case HLSL_IR_JUMP_RETURN: -+ vkd3d_unreachable(); -+ -+ default: -+ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ return false; -+ } -+} -+ -+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); -+ -+static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) -+{ -+ struct hlsl_ir_node *instr = &iff->node; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) -+ return; -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL); -+ -+ sm4_generate_vsir_block(ctx, &iff->then_block, program); -+ -+ if (!list_empty(&iff->else_block.instrs)) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) -+ return; -+ sm4_generate_vsir_block(ctx, &iff->else_block, program); -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) -+ return; -+} -+ -+static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_loop *loop) -+{ -+ struct hlsl_ir_node *instr = &loop->node; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0))) -+ return; -+ -+ sm4_generate_vsir_block(ctx, &loop->body, program); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0))) -+ return; -+} -+ -+static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_switch *swi) -+{ -+ const struct hlsl_ir_node *selector = swi->selector.node; -+ struct hlsl_ir_node *instr = &swi->node; -+ struct vkd3d_shader_instruction *ins; -+ struct hlsl_ir_switch_case *cas; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1))) -+ return; -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL); -+ -+ LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry) -+ { -+ if (cas->is_default) -+ { -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0))) -+ return; -+ } -+ else -+ { -+ struct hlsl_constant_value value = {.u[0].u = cas->value}; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1))) -+ return; -+ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL); -+ } -+ -+ sm4_generate_vsir_block(ctx, &cas->body, program); -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0))) -+ return; -+} -+ -+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) -+{ -+ struct vkd3d_string_buffer *dst_type_string; -+ struct hlsl_ir_node *instr, *next; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->data_type) -+ { -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -+ break; -+ } -+ } -+ -+ switch (instr->type) -+ { -+ case HLSL_IR_CALL: -+ vkd3d_unreachable(); -+ -+ case HLSL_IR_CONSTANT: -+ /* In SM4 all constants are inlined. */ -+ break; -+ -+ case HLSL_IR_EXPR: -+ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) -+ break; -+ sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer); -+ hlsl_release_string_buffer(ctx, dst_type_string); -+ break; -+ -+ case HLSL_IR_IF: -+ sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); -+ break; -+ -+ case HLSL_IR_LOAD: -+ sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); -+ break; -+ -+ case HLSL_IR_LOOP: -+ sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr)); -+ break; -+ -+ case HLSL_IR_RESOURCE_LOAD: -+ sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); -+ break; -+ -+ case HLSL_IR_RESOURCE_STORE: -+ sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr)); -+ break; -+ -+ case HLSL_IR_JUMP: -+ sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); -+ break; -+ -+ case HLSL_IR_STORE: -+ sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); -+ break; -+ -+ case HLSL_IR_SWITCH: -+ sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr)); -+ break; -+ -+ case HLSL_IR_SWIZZLE: -+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -+ break; -+ -+ default: -+ break; -+ } -+ } -+} -+ -+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) -+{ -+ bool is_patch_constant_func = func == ctx->patch_constant_func; -+ struct hlsl_block block = {0}; -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ uint32_t temp_count; -+ -+ compute_liveness(ctx, func); -+ mark_indexable_vars(ctx, func); -+ temp_count = allocate_temp_registers(ctx, func); -+ if (ctx->result) -+ return; -+ program->temp_count = max(program->temp_count, temp_count); -+ -+ hlsl_block_init(&block); -+ -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if ((var->is_input_semantic && var->last_read) -+ || (var->is_output_semantic && var->first_write)) -+ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); -+ } -+ -+ if (temp_count) -+ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { - if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) - continue; - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -@@ -8618,6 +9469,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - hlsl_block_cleanup(&block); - - sm4_generate_vsir_block(ctx, &func->body, program); -+ -+ generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); - } - - /* OBJECTIVE: Translate all the information from ctx and entry_func to the -@@ -8649,9 +9502,16 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - program->thread_group_size.z = ctx->thread_count[2]; - } - -+ if (version.type == VKD3D_SHADER_TYPE_HULL) -+ generate_vsir_add_program_instruction(ctx, program, -+ &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); - sm4_generate_vsir_add_function(ctx, func, config_flags, program); - if (version.type == VKD3D_SHADER_TYPE_HULL) -+ { -+ generate_vsir_add_program_instruction(ctx, program, -+ &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); - sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); -+ } - } - - static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b0e89bededb..0c06db9ff15 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -201,6 +201,14 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 - src->reg.u.immconst_u32[0] = value; - } - -+static void vsir_src_param_init_io(struct vkd3d_shader_src_param *src, -+ enum vkd3d_shader_register_type reg_type, const struct signature_element *e, unsigned int idx_count) -+{ -+ vsir_src_param_init(src, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ src->swizzle = vsir_swizzle_from_writemask(e->mask); -+} -+ - void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) - { - vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); -@@ -278,6 +286,14 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader - param->shift = 0; - } - -+static void vsir_dst_param_init_io(struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_register_type reg_type, -+ const struct signature_element *e, unsigned int idx_count) -+{ -+ vsir_dst_param_init(dst, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+ dst->write_mask = e->mask; -+} -+ - static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); -@@ -743,6 +759,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - case VKD3DSIH_DCL_GLOBAL_FLAGS: - case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_TEMPS: -+ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - case VKD3DSIH_DCL_THREAD_GROUP: - case VKD3DSIH_DCL_UAV_TYPED: - vkd3d_shader_instruction_make_nop(ins); -@@ -1369,26 +1386,17 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param - } - } - --static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, -- enum vkd3d_shader_register_type reg_type, unsigned int idx_count) --{ -- param->write_mask = e->mask; -- param->modifiers = 0; -- param->shift = 0; -- vsir_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); --} -- - static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, - const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, - const struct vkd3d_shader_location *location) - { - struct vkd3d_shader_instruction *ins; -- struct vkd3d_shader_dst_param *param; - const struct signature_element *e; -- unsigned int i, count; -+ unsigned int i, count, stride = 0; - -- for (i = 0, count = 1; i < s->element_count; ++i) -- count += !!s->elements[i].used_mask; -+ for (i = 0; i < s->element_count; ++i) -+ stride += !!s->elements[i].used_mask; -+ count = 2 + 3 * stride; - - if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) - return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1399,31 +1407,75 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p - - ins = &normaliser->instructions.elements[dst]; - vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); -- ins->flags = 1; -- ++ins; -+ -+ ins = &normaliser->instructions.elements[dst + 1 + 3 * stride]; -+ vsir_instruction_init(ins, location, VKD3DSIH_RET); -+ -+ ins = &normaliser->instructions.elements[dst + 1]; - - for (i = 0; i < s->element_count; ++i) - { -+ struct vkd3d_shader_instruction *ins_in, *ins_out, *ins_mov; -+ struct vkd3d_shader_dst_param *param_in, *param_out; -+ - e = &s->elements[i]; - if (!e->used_mask) - continue; - -+ ins_in = ins; -+ ins_out = &ins[stride]; -+ ins_mov = &ins[2 * stride]; -+ - if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) - { -- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV); -- param = &ins->declaration.register_semantic.reg; -- ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); -+ vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT_SIV); -+ param_in = &ins_in->declaration.register_semantic.reg; -+ ins_in->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); -+ -+ vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT_SIV); -+ param_out = &ins_out->declaration.register_semantic.reg; -+ ins_out->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); - } - else - { -- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT); -- param = &ins->declaration.dst; -+ vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT); -+ param_in = &ins_in->declaration.dst; -+ -+ vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT); -+ param_out = &ins_out->declaration.dst; - } - -- shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); -- param->reg.idx[0].offset = input_control_point_count; -- param->reg.idx[1].offset = e->register_index; -- param->write_mask = e->mask; -+ vsir_dst_param_init_io(param_in, VKD3DSPR_INPUT, e, 2); -+ param_in->reg.idx[0].offset = input_control_point_count; -+ param_in->reg.idx[1].offset = e->register_index; -+ param_in->write_mask = e->mask; -+ -+ vsir_dst_param_init_io(param_out, VKD3DSPR_OUTPUT, e, 2); -+ param_out->reg.idx[0].offset = input_control_point_count; -+ param_out->reg.idx[1].offset = e->register_index; -+ param_out->write_mask = e->mask; -+ -+ vsir_instruction_init(ins_mov, location, VKD3DSIH_MOV); -+ ins_mov->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); -+ ins_mov->dst_count = 1; -+ ins_mov->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); -+ ins_mov->src_count = 1; -+ -+ if (!ins_mov->dst || ! ins_mov->src) -+ { -+ WARN("Failed to allocate dst/src param.\n"); -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ } -+ -+ vsir_dst_param_init_io(&ins_mov->dst[0], VKD3DSPR_OUTPUT, e, 2); -+ ins_mov->dst[0].reg.idx[0].offset = 0; -+ ins_mov->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; -+ ins_mov->dst[0].reg.idx[1].offset = e->register_index; -+ -+ vsir_src_param_init_io(&ins_mov->src[0], VKD3DSPR_INPUT, e, 2); -+ ins_mov->src[0].reg.idx[0].offset = 0; -+ ins_mov->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; -+ ins_mov->src[0].reg.idx[1].offset = e->register_index; - - ++ins; - } -@@ -2129,6 +2181,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par - - case VKD3DSPR_OUTCONTROLPOINT: - reg->type = VKD3DSPR_OUTPUT; -+ if (io_normaliser_is_in_fork_or_join_phase(normaliser)) -+ normaliser->use_vocp = true; - /* fall through */ - case VKD3DSPR_OUTPUT: - reg_idx = reg->idx[reg->idx_count - 1].offset; -@@ -2179,9 +2233,6 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - { - reg = &ins->declaration.dst.reg; - -- if (reg->type == VKD3DSPR_OUTCONTROLPOINT) -- normaliser->use_vocp = true; -- - /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their - * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ - if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) -@@ -6665,6 +6716,9 @@ struct validation_context - enum vkd3d_shader_opcode *blocks; - size_t depth; - size_t blocks_capacity; -+ -+ unsigned int outer_tess_idxs[4]; -+ unsigned int inner_tess_idxs[2]; - }; - - static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, -@@ -7185,6 +7239,10 @@ static void vsir_validate_register(struct validation_context *ctx, - vsir_validate_register_without_indices(ctx, reg); - break; - -+ case VKD3DSPR_PRIMID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ - case VKD3DSPR_NULL: - vsir_validate_register_without_indices(ctx, reg); - break; -@@ -7201,6 +7259,18 @@ static void vsir_validate_register(struct validation_context *ctx, - vsir_validate_uav_register(ctx, reg); - break; - -+ case VKD3DSPR_OUTPOINTID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_FORKINSTID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_JOININSTID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ - case VKD3DSPR_INCONTROLPOINT: - vsir_validate_io_register(ctx, reg); - break; -@@ -7213,6 +7283,38 @@ static void vsir_validate_register(struct validation_context *ctx, - vsir_validate_io_register(ctx, reg); - break; - -+ case VKD3DSPR_TESSCOORD: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_THREADID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_THREADGROUPID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_LOCALTHREADID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_LOCALTHREADINDEX: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_COVERAGE: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_SAMPLEMASK: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_GSINSTID: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ - case VKD3DSPR_DEPTHOUTGE: - vsir_validate_register_without_indices(ctx, reg); - break; -@@ -7221,10 +7323,22 @@ static void vsir_validate_register(struct validation_context *ctx, - vsir_validate_register_without_indices(ctx, reg); - break; - -+ case VKD3DSPR_OUTSTENCILREF: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ - case VKD3DSPR_SSA: - vsir_validate_ssa_register(ctx, reg); - break; - -+ case VKD3DSPR_WAVELANECOUNT: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ -+ case VKD3DSPR_WAVELANEINDEX: -+ vsir_validate_register_without_indices(ctx, reg); -+ break; -+ - default: - break; - } -@@ -7469,9 +7583,11 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - const struct shader_signature *signature, enum vsir_signature_type signature_type, - unsigned int idx) - { -+ enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; - const char *signature_type_name = signature_type_names[signature_type]; - const struct signature_element *element = &signature->elements[idx]; -- bool integer_type = false; -+ bool integer_type = false, is_outer = false; -+ unsigned int semantic_index_max = 0; - - if (element->register_count == 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -@@ -7523,12 +7639,6 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - case VKD3D_SHADER_SV_INSTANCE_ID: - case VKD3D_SHADER_SV_IS_FRONT_FACE: - case VKD3D_SHADER_SV_SAMPLE_INDEX: -- case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: -- case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: -- case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: -- case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: -- case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: -- case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: - case VKD3D_SHADER_SV_TARGET: - case VKD3D_SHADER_SV_DEPTH: - case VKD3D_SHADER_SV_COVERAGE: -@@ -7537,6 +7647,37 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - case VKD3D_SHADER_SV_STENCIL_REF: - break; - -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; -+ semantic_index_max = 4; -+ is_outer = true; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; -+ semantic_index_max = 2; -+ is_outer = false; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; -+ semantic_index_max = 3; -+ is_outer = true; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; -+ semantic_index_max = 1; -+ is_outer = false; -+ break; -+ -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: -+ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; -+ semantic_index_max = 2; -+ is_outer = true; -+ break; -+ - default: - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid system value semantic %#x.", -@@ -7544,6 +7685,38 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - break; - } - -+ if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) -+ { -+ if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: System value semantic %#x is only valid " -+ "in the patch constant signature.", -+ idx, signature_type_name, element->sysval_semantic); -+ -+ if (ctx->program->tess_domain != expected_tess_domain) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", -+ idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); -+ -+ if (element->semantic_index >= semantic_index_max) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", -+ idx, signature_type_name, element->semantic_index, element->sysval_semantic); -+ } -+ else -+ { -+ unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; -+ -+ if (*idx_pos != ~0u) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", -+ idx, signature_type_name, element->semantic_index, element->sysval_semantic); -+ else -+ *idx_pos = idx; -+ } -+ } -+ - if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) - { - const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; -@@ -7643,6 +7816,109 @@ static void vsir_validate_signature(struct validation_context *ctx, - - for (i = 0; i < signature->element_count; ++i) - vsir_validate_signature_element(ctx, signature, signature_type, i); -+ -+ if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT) -+ { -+ const struct signature_element *first_element, *element; -+ unsigned int expected_outer_count = 0; -+ unsigned int expected_inner_count = 0; -+ -+ switch (ctx->program->tess_domain) -+ { -+ case VKD3D_TESSELLATOR_DOMAIN_QUAD: -+ expected_outer_count = 4; -+ expected_inner_count = 2; -+ break; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: -+ expected_outer_count = 3; -+ expected_inner_count = 1; -+ break; -+ -+ case VKD3D_TESSELLATOR_DOMAIN_LINE: -+ expected_outer_count = 2; -+ expected_inner_count = 0; -+ break; -+ -+ default: -+ break; -+ } -+ -+ /* After I/O normalisation tessellation factors are merged in a single array. */ -+ if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) -+ { -+ expected_outer_count = min(1, expected_outer_count); -+ expected_inner_count = min(1, expected_inner_count); -+ } -+ -+ first_element = NULL; -+ for (i = 0; i < expected_outer_count; ++i) -+ { -+ if (ctx->outer_tess_idxs[i] == ~0u) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Missing outer system value semantic %u.", i); -+ } -+ else -+ { -+ element = &signature->elements[ctx->outer_tess_idxs[i]]; -+ -+ if (!first_element) -+ { -+ first_element = element; -+ continue; -+ } -+ -+ if (element->register_index != first_element->register_index + i) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid register index %u for outer system value semantic %u, expected %u.", -+ element->register_index, i, first_element->register_index + i); -+ } -+ -+ if (element->mask != first_element->mask) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, -+ "Invalid mask %#x for outer system value semantic %u, expected %#x.", -+ element->mask, i, first_element->mask); -+ } -+ } -+ } -+ -+ first_element = NULL; -+ for (i = 0; i < expected_inner_count; ++i) -+ { -+ if (ctx->inner_tess_idxs[i] == ~0u) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Missing inner system value semantic %u.", i); -+ } -+ else -+ { -+ element = &signature->elements[ctx->inner_tess_idxs[i]]; -+ -+ if (!first_element) -+ { -+ first_element = element; -+ continue; -+ } -+ -+ if (element->register_index != first_element->register_index + i) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid register index %u for inner system value semantic %u, expected %u.", -+ element->register_index, i, first_element->register_index + i); -+ } -+ -+ if (element->mask != first_element->mask) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, -+ "Invalid mask %#x for inner system value semantic %u, expected %#x.", -+ element->mask, i, first_element->mask); -+ } -+ } -+ } -+ } - } - - static const char *name_from_cf_type(enum vsir_control_flow_type type) -@@ -7754,6 +8030,39 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, - instruction->declaration.max_tessellation_factor); - } - -+static void vsir_validate_dcl_input(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.dst.reg.type) -+ { -+ /* Signature input registers. */ -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_INCONTROLPOINT: -+ case VKD3DSPR_OUTCONTROLPOINT: -+ case VKD3DSPR_PATCHCONST: -+ /* Non-signature input registers. */ -+ case VKD3DSPR_PRIMID: -+ case VKD3DSPR_FORKINSTID: -+ case VKD3DSPR_JOININSTID: -+ case VKD3DSPR_THREADID: -+ case VKD3DSPR_THREADGROUPID: -+ case VKD3DSPR_LOCALTHREADID: -+ case VKD3DSPR_LOCALTHREADINDEX: -+ case VKD3DSPR_COVERAGE: -+ case VKD3DSPR_TESSCOORD: -+ case VKD3DSPR_OUTPOINTID: -+ case VKD3DSPR_GSINSTID: -+ case VKD3DSPR_WAVELANECOUNT: -+ case VKD3DSPR_WAVELANEINDEX: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_INPUT.", -+ instruction->declaration.dst.reg.type); -+ } -+} -+ - static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction) - { -@@ -7763,6 +8072,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, - instruction->declaration.primitive_type.type); - } - -+static void vsir_validate_dcl_input_ps(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.dst.reg.type) -+ { -+ case VKD3DSPR_INPUT: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_INPUT_PS.", -+ instruction->declaration.dst.reg.type); -+ } -+} -+ -+static void vsir_validate_dcl_input_ps_sgv(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.register_semantic.reg.reg.type) -+ { -+ case VKD3DSPR_INPUT: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_INPUT_PS_SGV.", -+ instruction->declaration.register_semantic.reg.reg.type); -+ } -+} -+ -+static void vsir_validate_dcl_input_ps_siv(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.register_semantic.reg.reg.type) -+ { -+ case VKD3DSPR_INPUT: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_INPUT_PS_SIV.", -+ instruction->declaration.register_semantic.reg.reg.type); -+ } -+} -+ -+static void vsir_validate_dcl_input_sgv(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.register_semantic.reg.reg.type) -+ { -+ case VKD3DSPR_INPUT: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_INPUT_SGV.", -+ instruction->declaration.register_semantic.reg.reg.type); -+ } -+} -+ -+static void vsir_validate_dcl_input_siv(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.register_semantic.reg.reg.type) -+ { -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_PATCHCONST: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_INPUT_SIV.", -+ instruction->declaration.register_semantic.reg.reg.type); -+ } -+} -+ -+static void vsir_validate_dcl_output(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.dst.reg.type) -+ { -+ /* Signature output registers. */ -+ case VKD3DSPR_OUTPUT: -+ case VKD3DSPR_PATCHCONST: -+ /* Non-signature output registers. */ -+ case VKD3DSPR_DEPTHOUT: -+ case VKD3DSPR_SAMPLEMASK: -+ case VKD3DSPR_DEPTHOUTGE: -+ case VKD3DSPR_DEPTHOUTLE: -+ case VKD3DSPR_OUTSTENCILREF: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_OUTPUT.", -+ instruction->declaration.dst.reg.type); -+ } -+} -+ - static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction) - { -@@ -7772,6 +8180,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte - instruction->declaration.count); - } - -+static void vsir_validate_dcl_output_siv(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ switch (instruction->declaration.register_semantic.reg.reg.type) -+ { -+ case VKD3DSPR_OUTPUT: -+ case VKD3DSPR_PATCHCONST: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in instruction DCL_OUTPUT_SIV.", -+ instruction->declaration.register_semantic.reg.reg.type); -+ } -+} -+ - static void vsir_validate_dcl_output_topology(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction) - { -@@ -7801,6 +8225,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, - || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, - "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); -+ -+ if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", -+ instruction->declaration.tessellator_domain, ctx->program->tess_domain); - } - - static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, -@@ -8063,8 +8492,16 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ - [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, - [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, - [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, -+ [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input}, - [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, -+ [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps}, -+ [VKD3DSIH_DCL_INPUT_PS_SGV] = {0, 0, vsir_validate_dcl_input_ps_sgv}, -+ [VKD3DSIH_DCL_INPUT_PS_SIV] = {0, 0, vsir_validate_dcl_input_ps_siv}, -+ [VKD3DSIH_DCL_INPUT_SGV] = {0, 0, vsir_validate_dcl_input_sgv}, -+ [VKD3DSIH_DCL_INPUT_SIV] = {0, 0, vsir_validate_dcl_input_siv}, -+ [VKD3DSIH_DCL_OUTPUT] = {0, 0, vsir_validate_dcl_output}, - [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, -+ [VKD3DSIH_DCL_OUTPUT_SIV] = {0, 0, vsir_validate_dcl_output_siv}, - [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, - [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, - [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, -@@ -8177,6 +8614,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - .status = VKD3D_OK, - .phase = VKD3DSIH_INVALID, - .invalid_instruction_idx = true, -+ .outer_tess_idxs[0] = ~0u, -+ .outer_tess_idxs[1] = ~0u, -+ .outer_tess_idxs[2] = ~0u, -+ .outer_tess_idxs[3] = ~0u, -+ .inner_tess_idxs[0] = ~0u, -+ .inner_tess_idxs[1] = ~0u, - }; - unsigned int i; - -@@ -8187,12 +8630,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - { - case VKD3D_SHADER_TYPE_HULL: - case VKD3D_SHADER_TYPE_DOMAIN: -+ if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID -+ || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Invalid tessellation domain %#x.", program->tess_domain); - break; - - default: - if (program->patch_constant_signature.element_count != 0) - validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "Patch constant signature is only valid for hull and domain shaders."); -+ -+ if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, -+ "Invalid tessellation domain %#x.", program->tess_domain); - } - - switch (program->shader_version.type) -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index df3edeaa4e6..29f51088728 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -153,6 +153,64 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, - msl_print_register_datatype(buffer, gen, reg->data_type); - break; - -+ case VKD3DSPR_IMMCONST: -+ switch (reg->dimension) -+ { -+ case VSIR_DIMENSION_SCALAR: -+ switch (reg->data_type) -+ { -+ case VKD3D_DATA_INT: -+ vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); -+ break; -+ case VKD3D_DATA_UINT: -+ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); -+ break; -+ case VKD3D_DATA_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "", reg->data_type); -+ break; -+ } -+ break; -+ -+ case VSIR_DIMENSION_VEC4: -+ switch (reg->data_type) -+ { -+ case VKD3D_DATA_INT: -+ vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", -+ reg->u.immconst_u32[0], reg->u.immconst_u32[1], -+ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); -+ break; -+ case VKD3D_DATA_UINT: -+ vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", -+ reg->u.immconst_u32[0], reg->u.immconst_u32[1], -+ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); -+ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); -+ break; -+ case VKD3D_DATA_FLOAT: -+ vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", -+ reg->u.immconst_u32[0], reg->u.immconst_u32[1], -+ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); -+ vkd3d_string_buffer_printf(buffer, "", reg->data_type); -+ break; -+ } -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(buffer, "", reg->dimension); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled dimension %#x.", reg->dimension); -+ break; -+ } -+ break; -+ - case VKD3DSPR_CONSTBUFFER: - if (reg->idx_count != 3) - { -@@ -215,19 +273,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, - const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) - { - const struct vkd3d_shader_register *reg = &vsir_src->reg; -+ struct vkd3d_string_buffer *str; - - msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); - - if (reg->non_uniform) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled 'non-uniform' modifier."); -- if (vsir_src->modifiers) -- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -- "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - -- msl_print_register_name(msl_src->str, gen, reg); -+ if (!vsir_src->modifiers) -+ str = msl_src->str; -+ else -+ str = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ msl_print_register_name(str, gen, reg); - if (reg->dimension == VSIR_DIMENSION_VEC4) -- msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); -+ msl_print_swizzle(str, vsir_src->swizzle, mask); -+ -+ switch (vsir_src->modifiers) -+ { -+ case VKD3DSPSM_NONE: -+ break; -+ case VKD3DSPSM_NEG: -+ vkd3d_string_buffer_printf(msl_src->str, "-%s", str->buffer); -+ break; -+ case VKD3DSPSM_ABS: -+ vkd3d_string_buffer_printf(msl_src->str, "abs(%s)", str->buffer); -+ break; -+ default: -+ vkd3d_string_buffer_printf(msl_src->str, "(%s)", -+ vsir_src->modifiers, str->buffer); -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); -+ break; -+ } -+ -+ if (str != msl_src->str) -+ vkd3d_string_buffer_release(&gen->string_buffers, str); - } - - static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) -@@ -288,6 +370,80 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i - "Internal compiler error: Unhandled instruction %#x.", ins->opcode); - } - -+static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) -+{ -+ struct msl_src src[2]; -+ struct msl_dst dst; -+ uint32_t mask; -+ -+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src[0], gen, &ins->src[0], mask); -+ msl_src_init(&src[1], gen, &ins->src[1], mask); -+ -+ msl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); -+ -+ msl_src_cleanup(&src[1], &gen->string_buffers); -+ msl_src_cleanup(&src[0], &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) -+{ -+ struct msl_src src; -+ struct msl_dst dst; -+ uint32_t mask; -+ -+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src, gen, &ins->src[0], mask); -+ -+ msl_print_assignment(gen, &dst, "%s(%s)", op, src.str->buffer); -+ -+ msl_src_cleanup(&src, &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) -+{ -+ unsigned int mask_size; -+ struct msl_src src[2]; -+ struct msl_dst dst; -+ uint32_t mask; -+ -+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src[0], gen, &ins->src[0], mask); -+ msl_src_init(&src[1], gen, &ins->src[1], mask); -+ -+ if ((mask_size = vsir_write_mask_component_count(mask)) > 1) -+ msl_print_assignment(gen, &dst, "select(uint%u(0u), uint%u(0xffffffffu), bool%u(%s %s %s))", -+ mask_size, mask_size, mask_size, src[0].str->buffer, op, src[1].str->buffer); -+ else -+ msl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", -+ src[0].str->buffer, op, src[1].str->buffer); -+ -+ msl_src_cleanup(&src[1], &gen->string_buffers); -+ msl_src_cleanup(&src[0], &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor) -+{ -+ unsigned int component_count; -+ struct msl_src src; -+ struct msl_dst dst; -+ uint32_t mask; -+ -+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src, gen, &ins->src[0], mask); -+ -+ if ((component_count = vsir_write_mask_component_count(mask)) > 1) -+ msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer); -+ else -+ msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer); -+ -+ msl_src_cleanup(&src, &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ - static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - struct msl_src src; -@@ -303,6 +459,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc - msl_dst_cleanup(&dst, &gen->string_buffers); - } - -+static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ unsigned int component_count; -+ struct msl_src src[3]; -+ struct msl_dst dst; -+ uint32_t mask; -+ -+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src[0], gen, &ins->src[0], mask); -+ msl_src_init(&src[1], gen, &ins->src[1], mask); -+ msl_src_init(&src[2], gen, &ins->src[2], mask); -+ -+ if ((component_count = vsir_write_mask_component_count(mask)) > 1) -+ msl_print_assignment(gen, &dst, "select(%s, %s, bool%u(%s))", -+ src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); -+ else -+ msl_print_assignment(gen, &dst, "select(%s, %s, bool(%s))", -+ src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); -+ -+ msl_src_cleanup(&src[2], &gen->string_buffers); -+ msl_src_cleanup(&src[1], &gen->string_buffers); -+ msl_src_cleanup(&src[0], &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ - static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - msl_print_indent(gen->buffer, gen->indent); -@@ -315,17 +496,61 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - - switch (ins->opcode) - { -+ case VKD3DSIH_ADD: -+ msl_binop(gen, ins, "+"); -+ break; -+ case VKD3DSIH_AND: -+ msl_binop(gen, ins, "&"); -+ break; - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; -+ case VKD3DSIH_DIV: -+ msl_binop(gen, ins, "/"); -+ break; -+ case VKD3DSIH_FRC: -+ msl_intrinsic(gen, ins, "fract"); -+ break; -+ case VKD3DSIH_FTOI: -+ msl_cast(gen, ins, "int"); -+ break; -+ case VKD3DSIH_FTOU: -+ msl_cast(gen, ins, "uint"); -+ break; -+ case VKD3DSIH_GEO: -+ msl_relop(gen, ins, ">="); -+ break; -+ case VKD3DSIH_INE: -+ case VKD3DSIH_NEU: -+ msl_relop(gen, ins, "!="); -+ break; -+ case VKD3DSIH_ITOF: -+ case VKD3DSIH_UTOF: -+ msl_cast(gen, ins, "float"); -+ break; - case VKD3DSIH_MOV: - msl_mov(gen, ins); - break; -+ case VKD3DSIH_MOVC: -+ msl_movc(gen, ins); -+ break; -+ case VKD3DSIH_MUL: -+ msl_binop(gen, ins, "*"); -+ break; -+ case VKD3DSIH_OR: -+ msl_binop(gen, ins, "|"); -+ break; - case VKD3DSIH_RET: - msl_ret(gen, ins); - break; -+ case VKD3DSIH_ROUND_PI: -+ msl_intrinsic(gen, ins, "ceil"); -+ break; -+ case VKD3DSIH_ROUND_Z: -+ msl_intrinsic(gen, ins, "trunc"); -+ break; - default: - msl_unhandled(gen, ins); - break; -@@ -790,6 +1015,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader - MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); - - vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); -+ vkd3d_string_buffer_printf(gen->buffer, "#include \n\n"); -+ vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n"); - - if (gen->program->global_flags) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index 4a8d0fddae1..d167415c356 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -20,6 +20,7 @@ - - %{ - -+#include "preproc.h" - #include "preproc.tab.h" - - #undef ERROR /* defined in wingdi.h */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index bdfd632ad12..005b40a9d1f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -2471,7 +2471,6 @@ struct spirv_compiler - bool emit_point_size; - - enum vkd3d_shader_opcode phase; -- bool emit_default_control_point_phase; - struct vkd3d_shader_phase control_point_phase; - struct vkd3d_shader_phase patch_constant_phase; - -@@ -6822,15 +6821,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler - compiler->spirv_builder.invocation_count = instruction->declaration.count; - } - --static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, -+ enum vkd3d_tessellator_domain domain) - { -- enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; - SpvExecutionMode mode; - -- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) -- return; -- - switch (domain) - { - case VKD3D_TESSELLATOR_DOMAIN_LINE: -@@ -6916,15 +6911,10 @@ static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compile - SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); - } - --static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); -- - static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - -- if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) -- spirv_compiler_emit_default_control_point_phase(compiler); -- - vkd3d_spirv_build_op_function_end(builder); - - if (is_in_control_point_phase(compiler)) -@@ -6969,9 +6959,6 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - phase->function_id = function_id; - /* The insertion location must be set after the label is emitted. */ - phase->function_location = 0; -- -- if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) -- compiler->emit_default_control_point_phase = instruction->flags; - } - - static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) -@@ -7000,63 +6987,6 @@ static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) - } - } - --static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) --{ -- const struct shader_signature *output_signature = &compiler->output_signature; -- const struct shader_signature *input_signature = &compiler->input_signature; -- uint32_t type_id, output_ptr_type_id, input_id, dst_id, invocation_id; -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- enum vkd3d_shader_component_type component_type; -- struct vkd3d_shader_src_param invocation; -- struct vkd3d_shader_register input_reg; -- unsigned int component_count; -- unsigned int i; -- -- vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); -- spirv_compiler_initialise_block(compiler); -- invocation_id = spirv_compiler_emit_load_invocation_id(compiler); -- -- memset(&invocation, 0, sizeof(invocation)); -- vsir_register_init(&invocation.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_INT, 0); -- invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; -- -- vsir_register_init(&input_reg, VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 2); -- input_reg.idx[0].offset = 0; -- input_reg.idx[0].rel_addr = &invocation; -- input_reg.idx[1].offset = 0; -- input_id = spirv_compiler_get_register_id(compiler, &input_reg); -- -- VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); -- for (i = 0; i < output_signature->element_count; ++i) -- { -- const struct signature_element *output = &output_signature->elements[i]; -- const struct signature_element *input = &input_signature->elements[i]; -- struct vkd3d_shader_register_info output_reg_info; -- struct vkd3d_shader_register output_reg; -- -- VKD3D_ASSERT(input->mask == output->mask); -- VKD3D_ASSERT(input->component_type == output->component_type); -- -- input_reg.idx[1].offset = i; -- input_id = spirv_compiler_get_register_id(compiler, &input_reg); -- -- vsir_register_init(&output_reg, VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); -- output_reg.idx[0].offset = i; -- spirv_compiler_get_register_info(compiler, &output_reg, &output_reg_info); -- -- component_type = output->component_type; -- component_count = vsir_write_mask_component_count(output->mask); -- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); -- output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); -- -- dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_reg_info.id, invocation_id); -- -- vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); -- } -- -- vkd3d_spirv_build_op_return(builder); --} -- - static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, - SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) - { -@@ -10239,9 +10169,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - spirv_compiler_emit_output_vertex_count(compiler, instruction); - break; -- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: -- spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); -- break; - case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: - spirv_compiler_emit_tessellator_output_primitive(compiler, - instruction->declaration.tessellator_output_primitive); -@@ -10743,6 +10670,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - compiler->input_control_point_count = program->input_control_point_count; - compiler->output_control_point_count = program->output_control_point_count; - -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) -+ spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); -+ - if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 9c41e2c2053..409328b2e53 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -707,6 +707,7 @@ struct vkd3d_sm4_opcode_info - char src_info[SM4_MAX_SRC_COUNT]; - void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, - const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); -+ bool is_conditional_op; - }; - - static const enum vkd3d_primitive_type output_primitive_type_table[] = -@@ -1268,6 +1269,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi - { - ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -+ priv->p.program->tess_domain = ins->declaration.tessellator_domain; - } - - static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1439,18 +1441,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, - {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, - {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", -- shader_sm4_read_conditional_op}, -+ shader_sm4_read_conditional_op, true}, - {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", - shader_sm4_read_case_condition}, - {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, - {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", -- shader_sm4_read_conditional_op}, -+ shader_sm4_read_conditional_op, true}, - {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, - {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, - {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, - {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, - {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", -- shader_sm4_read_conditional_op}, -+ shader_sm4_read_conditional_op, true}, - {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, - {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, - {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, -@@ -1468,7 +1470,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"}, - {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, - {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", -- shader_sm4_read_conditional_op}, -+ shader_sm4_read_conditional_op, true}, - {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, - {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, - {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, -@@ -1502,7 +1504,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, - {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, - {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", -- shader_sm4_read_conditional_op}, -+ shader_sm4_read_conditional_op, true}, - {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, - {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, - {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, -@@ -1967,16 +1969,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t - return lookup->register_type_info_from_vkd3d[vkd3d_type]; - } - --static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( -- const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) --{ -- const struct vkd3d_sm4_register_type_info *register_type_info = -- get_info_from_vkd3d_register_type(lookup, vkd3d_type); -- -- VKD3D_ASSERT(register_type_info); -- return register_type_info->default_src_swizzle_type; --} -- - static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( - const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) - { -@@ -2993,8 +2985,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - return VKD3D_OK; - } - --static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); -- - bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, - const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) - { -@@ -3331,6 +3321,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_HULL_SHADER: - case HLSL_CLASS_GEOMETRY_SHADER: - case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STREAM_OUTPUT: - case HLSL_CLASS_NULL: - break; - } -@@ -4096,297 +4087,6 @@ struct sm4_instruction - unsigned int idx_src_count; - }; - --static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, -- const struct hlsl_ir_node *instr) --{ -- VKD3D_ASSERT(instr->reg.allocated); -- reg->type = VKD3DSPR_TEMP; -- reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = instr->reg.id; -- reg->idx_count = 1; -- *writemask = instr->reg.writemask; --} -- --static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, -- enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, -- struct sm4_instruction *sm4_instr) --{ -- const struct hlsl_ir_var *var = deref->var; -- unsigned int offset_const_deref; -- -- reg->type = type; -- reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; -- reg->dimension = VSIR_DIMENSION_VEC4; -- -- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); -- -- if (!var->indexable) -- { -- offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); -- reg->idx[0].offset += offset_const_deref / 4; -- reg->idx_count = 1; -- } -- else -- { -- offset_const_deref = deref->const_offset; -- reg->idx[1].offset = offset_const_deref / 4; -- reg->idx_count = 2; -- -- if (deref->rel_offset.node) -- { -- struct vkd3d_shader_src_param *idx_src; -- unsigned int idx_writemask; -- -- VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); -- idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; -- memset(idx_src, 0, sizeof(*idx_src)); -- -- reg->idx[1].rel_addr = idx_src; -- sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); -- VKD3D_ASSERT(idx_writemask != 0); -- idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); -- } -- } -- -- *writemask = 0xf & (0xf << (offset_const_deref % 4)); -- if (var->regs[HLSL_REGSET_NUMERIC].writemask) -- *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); --} -- --static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, -- uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) --{ -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -- const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); -- const struct hlsl_ir_var *var = deref->var; -- struct hlsl_ctx *ctx = tpf->ctx; -- -- if (var->is_uniform) -- { -- enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); -- -- if (regset == HLSL_REGSET_TEXTURES) -- { -- reg->type = VKD3DSPR_RESOURCE; -- reg->dimension = VSIR_DIMENSION_VEC4; -- if (vkd3d_shader_ver_ge(version, 5, 1)) -- { -- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -- reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -- reg->idx_count = 2; -- } -- else -- { -- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -- reg->idx_count = 1; -- } -- VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); -- *writemask = VKD3DSP_WRITEMASK_ALL; -- } -- else if (regset == HLSL_REGSET_UAVS) -- { -- reg->type = VKD3DSPR_UAV; -- reg->dimension = VSIR_DIMENSION_VEC4; -- if (vkd3d_shader_ver_ge(version, 5, 1)) -- { -- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -- reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -- reg->idx_count = 2; -- } -- else -- { -- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -- reg->idx_count = 1; -- } -- VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); -- *writemask = VKD3DSP_WRITEMASK_ALL; -- } -- else if (regset == HLSL_REGSET_SAMPLERS) -- { -- reg->type = VKD3DSPR_SAMPLER; -- reg->dimension = VSIR_DIMENSION_NONE; -- if (vkd3d_shader_ver_ge(version, 5, 1)) -- { -- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -- reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -- reg->idx_count = 2; -- } -- else -- { -- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -- reg->idx_count = 1; -- } -- VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); -- *writemask = VKD3DSP_WRITEMASK_ALL; -- } -- else -- { -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; -- -- VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); -- reg->type = VKD3DSPR_CONSTBUFFER; -- reg->dimension = VSIR_DIMENSION_VEC4; -- if (vkd3d_shader_ver_ge(version, 5, 1)) -- { -- reg->idx[0].offset = var->buffer->reg.id; -- reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -- reg->idx[2].offset = offset / 4; -- reg->idx_count = 3; -- } -- else -- { -- reg->idx[0].offset = var->buffer->reg.index; -- reg->idx[1].offset = offset / 4; -- reg->idx_count = 2; -- } -- *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); -- } -- } -- else if (var->is_input_semantic) -- { -- bool has_idx; -- -- if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) -- { -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -- -- if (has_idx) -- { -- reg->idx[0].offset = var->semantic.index + offset / 4; -- reg->idx_count = 1; -- } -- -- if (shader_sm4_is_scalar_register(reg)) -- reg->dimension = VSIR_DIMENSION_SCALAR; -- else -- reg->dimension = VSIR_DIMENSION_VEC4; -- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -- } -- else -- { -- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -- -- VKD3D_ASSERT(hlsl_reg.allocated); -- -- if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -- reg->type = VKD3DSPR_PATCHCONST; -- else -- reg->type = VKD3DSPR_INPUT; -- reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = hlsl_reg.id; -- reg->idx_count = 1; -- *writemask = hlsl_reg.writemask; -- } -- } -- else if (var->is_output_semantic) -- { -- bool has_idx; -- -- if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) -- { -- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -- -- if (has_idx) -- { -- reg->idx[0].offset = var->semantic.index + offset / 4; -- reg->idx_count = 1; -- } -- -- if (shader_sm4_is_scalar_register(reg)) -- reg->dimension = VSIR_DIMENSION_SCALAR; -- else -- reg->dimension = VSIR_DIMENSION_VEC4; -- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); -- } -- else -- { -- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); -- -- VKD3D_ASSERT(hlsl_reg.allocated); -- reg->type = VKD3DSPR_OUTPUT; -- reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = hlsl_reg.id; -- reg->idx_count = 1; -- *writemask = hlsl_reg.writemask; -- } -- } -- else -- { -- enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; -- -- sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); -- } --} -- --static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, -- const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) --{ -- unsigned int hlsl_swizzle; -- uint32_t writemask; -- -- sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); -- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) -- { -- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -- src->swizzle = swizzle_from_sm4(hlsl_swizzle); -- } --} -- --static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) --{ -- sm4_register_from_node(&dst->reg, &dst->write_mask, instr); --} -- --static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, -- const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) --{ -- src->swizzle = 0; -- src->reg.type = VKD3DSPR_IMMCONST; -- if (width == 1) -- { -- src->reg.dimension = VSIR_DIMENSION_SCALAR; -- src->reg.u.immconst_u32[0] = value->u[0].u; -- } -- else -- { -- unsigned int i, j = 0; -- -- src->reg.dimension = VSIR_DIMENSION_VEC4; -- for (i = 0; i < 4; ++i) -- { -- if ((map_writemask & (1u << i)) && (j < width)) -- src->reg.u.immconst_u32[i] = value->u[j++].u; -- else -- src->reg.u.immconst_u32[i] = 0; -- } -- } --} -- --static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, -- const struct hlsl_ir_node *instr, uint32_t map_writemask) --{ -- unsigned int hlsl_swizzle; -- uint32_t writemask; -- -- if (instr->type == HLSL_IR_CONSTANT) -- { -- struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); -- -- sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); -- return; -- } -- -- sm4_register_from_node(&src->reg, &writemask, instr); -- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) -- { -- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -- src->swizzle = swizzle_from_sm4(hlsl_swizzle); -- } --} -- - static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, - unsigned int i) - { -@@ -4650,33 +4350,6 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s - sm4_update_stat_counters(tpf, instr); - } - --static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, -- const struct hlsl_ir_node *texel_offset) --{ -- struct sm4_instruction_modifier modif; -- struct hlsl_ir_constant *offset; -- -- if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) -- return false; -- offset = hlsl_ir_constant(texel_offset); -- -- modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; -- modif.u.aoffimmi.u = offset->value.u[0].i; -- modif.u.aoffimmi.v = 0; -- modif.u.aoffimmi.w = 0; -- if (offset->node.data_type->dimx > 1) -- modif.u.aoffimmi.v = offset->value.u[1].i; -- if (offset->node.data_type->dimx > 2) -- modif.u.aoffimmi.w = offset->value.u[2].i; -- if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 -- || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 -- || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) -- return false; -- -- instr->modifiers[instr->modifier_count++] = modif; -- return true; --} -- - static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) - { - size_t size = (cbuffer->used_size + 3) / 4; -@@ -4945,26 +4618,6 @@ static void tpf_write_hs_decls(const struct tpf_compiler *tpf) - write_sm4_instruction(tpf, &instr); - } - --static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, -- }; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, -- }; -- -- write_sm4_instruction(tpf, &instr); --} -- - static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) - { - struct sm4_instruction instr = -@@ -5022,594 +4675,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ret(const struct tpf_compiler *tpf) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_RET, -- }; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, -- const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, -- enum hlsl_sampler_dim dim) --{ -- const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); -- bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE -- && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); -- bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -- bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; -- unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- if (uav) -- instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; -- else if (raw) -- instr.opcode = VKD3D_SM5_OP_LD_RAW; -- else -- instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; -- -- if (texel_offset) -- { -- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -- { -- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -- "Offset must resolve to integer literal in the range -8 to 7."); -- return; -- } -- } -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- if (!uav) -- { -- /* Mipmap level is in the last component in the IR, but needs to be in the W -- * component in the instruction. */ -- unsigned int dim_count = hlsl_sampler_dim_count(dim); -- -- if (dim_count == 1) -- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; -- if (dim_count == 2) -- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; -- } -- -- sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask); -- -- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); -- -- instr.src_count = 2; -- -- if (multisampled) -- { -- if (sample_index->type == HLSL_IR_CONSTANT) -- { -- struct vkd3d_shader_register *reg = &instr.srcs[2].reg; -- struct hlsl_ir_constant *index; -- -- index = hlsl_ir_constant(sample_index); -- -- memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); -- reg->type = VKD3DSPR_IMMCONST; -- reg->dimension = VSIR_DIMENSION_SCALAR; -- reg->u.immconst_u32[0] = index->value.u[0].u; -- } -- else if (version->major == 4 && version->minor == 0) -- { -- hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); -- } -- else -- { -- sm4_src_from_node(tpf, &instr.srcs[2], sample_index, 0); -- } -- -- ++instr.src_count; -- } -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) --{ -- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -- const struct hlsl_ir_node *coords = load->coords.node; -- const struct hlsl_deref *resource = &load->resource; -- const struct hlsl_deref *sampler = &load->sampler; -- const struct hlsl_ir_node *dst = &load->node; -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- switch (load->load_type) -- { -- case HLSL_RESOURCE_SAMPLE: -- instr.opcode = VKD3D_SM4_OP_SAMPLE; -- break; -- -- case HLSL_RESOURCE_SAMPLE_CMP: -- instr.opcode = VKD3D_SM4_OP_SAMPLE_C; -- break; -- -- case HLSL_RESOURCE_SAMPLE_CMP_LZ: -- instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; -- break; -- -- case HLSL_RESOURCE_SAMPLE_LOD: -- instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; -- break; -- -- case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -- instr.opcode = VKD3D_SM4_OP_SAMPLE_B; -- break; -- -- case HLSL_RESOURCE_SAMPLE_GRAD: -- instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; -- break; -- -- default: -- vkd3d_unreachable(); -- } -- -- if (texel_offset) -- { -- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -- { -- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -- "Offset must resolve to integer literal in the range -8 to 7."); -- return; -- } -- } -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); -- sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); -- instr.src_count = 3; -- -- if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD -- || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) -- { -- sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); -- ++instr.src_count; -- } -- else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) -- { -- sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); -- instr.src_count += 2; -- } -- else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP -- || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) -- { -- sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); -- ++instr.src_count; -- } -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) --{ -- const struct hlsl_deref *resource = &load->resource; -- const struct hlsl_ir_node *dst = &load->node; -- struct sm4_instruction instr; -- -- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) -- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) --{ -- const struct hlsl_deref *resource = &load->resource; -- const struct hlsl_ir_node *dst = &load->node; -- struct sm4_instruction instr; -- -- if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER -- || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -- { -- hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); -- return; -- } -- -- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_RESINFO; -- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) -- instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_IF, -- .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, -- .src_count = 1, -- }; -- -- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); -- -- sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -- write_sm4_instruction(tpf, &instr); -- -- write_sm4_block(tpf, &iff->then_block); -- -- if (!list_empty(&iff->else_block.instrs)) -- { -- instr.opcode = VKD3D_SM4_OP_ELSE; -- instr.src_count = 0; -- write_sm4_instruction(tpf, &instr); -- -- write_sm4_block(tpf, &iff->else_block); -- } -- -- instr.opcode = VKD3D_SM4_OP_ENDIF; -- instr.src_count = 0; -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) --{ -- struct sm4_instruction instr = {0}; -- -- switch (jump->type) -- { -- case HLSL_IR_JUMP_BREAK: -- instr.opcode = VKD3D_SM4_OP_BREAK; -- break; -- -- case HLSL_IR_JUMP_CONTINUE: -- instr.opcode = VKD3D_SM4_OP_CONTINUE; -- break; -- -- case HLSL_IR_JUMP_DISCARD_NZ: -- { -- instr.opcode = VKD3D_SM4_OP_DISCARD; -- instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; -- -- memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -- instr.src_count = 1; -- sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); -- break; -- } -- -- case HLSL_IR_JUMP_RETURN: -- vkd3d_unreachable(); -- -- default: -- hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -- return; -- } -- -- write_sm4_instruction(tpf, &instr); --} -- --/* Does this variable's data come directly from the API user, rather than being -- * temporary or from a previous shader stage? -- * I.e. is it a uniform or VS input? */ --static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) --{ -- if (var->is_uniform) -- return true; -- -- return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; --} -- --static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) --{ -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -- const struct hlsl_type *type = load->node.data_type; -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- -- sm4_dst_from_node(&instr.dsts[0], &load->node); -- instr.dst_count = 1; -- -- VKD3D_ASSERT(hlsl_is_numeric_type(type)); -- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) -- { -- struct hlsl_constant_value value; -- -- /* Uniform bools can be specified as anything, but internal bools always -- * have 0 for false and ~0 for true. Normalize that here. */ -- -- instr.opcode = VKD3D_SM4_OP_MOVC; -- -- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); -- -- memset(&value, 0xff, sizeof(value)); -- sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); -- memset(&value, 0, sizeof(value)); -- sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); -- instr.src_count = 3; -- } -- else -- { -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); -- instr.src_count = 1; -- } -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_LOOP, -- }; -- -- write_sm4_instruction(tpf, &instr); -- -- write_sm4_block(tpf, &loop->body); -- -- instr.opcode = VKD3D_SM4_OP_ENDLOOP; -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, -- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, -- const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) --{ -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -- struct vkd3d_shader_src_param *src; -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- -- instr.opcode = VKD3D_SM4_OP_GATHER4; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); -- -- if (texel_offset) -- { -- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) -- { -- if (!vkd3d_shader_ver_ge(version, 5, 0)) -- { -- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, -- "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); -- return; -- } -- instr.opcode = VKD3D_SM5_OP_GATHER4_PO; -- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); -- } -- } -- -- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr); -- -- src = &instr.srcs[instr.src_count++]; -- sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); -- src->reg.dimension = VSIR_DIMENSION_VEC4; -- src->swizzle = swizzle; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) --{ -- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; -- const struct hlsl_ir_node *sample_index = load->sample_index.node; -- const struct hlsl_ir_node *coords = load->coords.node; -- -- if (load->sampler.var && !load->sampler.var->is_uniform) -- { -- hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); -- return; -- } -- -- if (!load->resource.var->is_uniform) -- { -- hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); -- return; -- } -- -- switch (load->load_type) -- { -- case HLSL_RESOURCE_LOAD: -- write_sm4_ld(tpf, &load->node, &load->resource, -- coords, sample_index, texel_offset, load->sampling_dim); -- break; -- -- case HLSL_RESOURCE_SAMPLE: -- case HLSL_RESOURCE_SAMPLE_CMP: -- case HLSL_RESOURCE_SAMPLE_CMP_LZ: -- case HLSL_RESOURCE_SAMPLE_LOD: -- case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -- case HLSL_RESOURCE_SAMPLE_GRAD: -- /* Combined sample expressions were lowered. */ -- VKD3D_ASSERT(load->sampler.var); -- write_sm4_sample(tpf, load); -- break; -- -- case HLSL_RESOURCE_GATHER_RED: -- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -- VKD3D_SHADER_SWIZZLE(X, X, X, X), texel_offset); -- break; -- -- case HLSL_RESOURCE_GATHER_GREEN: -- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -- VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), texel_offset); -- break; -- -- case HLSL_RESOURCE_GATHER_BLUE: -- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -- VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), texel_offset); -- break; -- -- case HLSL_RESOURCE_GATHER_ALPHA: -- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, -- VKD3D_SHADER_SWIZZLE(W, W, W, W), texel_offset); -- break; -- -- case HLSL_RESOURCE_SAMPLE_INFO: -- write_sm4_sampleinfo(tpf, load); -- break; -- -- case HLSL_RESOURCE_RESINFO: -- write_sm4_resinfo(tpf, load); -- break; -- -- case HLSL_RESOURCE_SAMPLE_PROJ: -- vkd3d_unreachable(); -- } --} -- --static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) --{ -- struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); -- struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; -- struct sm4_instruction instr; -- -- if (!store->resource.var->is_uniform) -- { -- hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); -- return; -- } -- -- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -- { -- hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); -- return; -- } -- -- memset(&instr, 0, sizeof(instr)); -- -- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr); -- instr.dst_count = 1; -- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -- { -- instr.opcode = VKD3D_SM5_OP_STORE_RAW; -- instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx); -- } -- else -- { -- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; -- } -- -- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) --{ -- const struct hlsl_ir_node *rhs = store->rhs.node; -- struct sm4_instruction instr; -- uint32_t writemask; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); -- instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) --{ -- const struct hlsl_ir_node *selector = s->selector.node; -- struct hlsl_ir_switch_case *c; -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_SWITCH; -- -- sm4_src_from_node(tpf, &instr.srcs[0], selector, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); -- -- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -- { -- memset(&instr, 0, sizeof(instr)); -- if (c->is_default) -- { -- instr.opcode = VKD3D_SM4_OP_DEFAULT; -- } -- else -- { -- struct hlsl_constant_value value = { .u[0].u = c->value }; -- -- instr.opcode = VKD3D_SM4_OP_CASE; -- sm4_src_from_constant_value(&instr.srcs[0], &value, 1, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 1; -- } -- -- write_sm4_instruction(tpf, &instr); -- write_sm4_block(tpf, &c->body); -- } -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_ENDSWITCH; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) --{ -- unsigned int hlsl_swizzle; -- struct sm4_instruction instr; -- uint32_t writemask; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_MOV; -- -- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); -- instr.dst_count = 1; -- -- sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); -- hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), -- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); -- instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- - static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) - { -+ struct sm4_instruction_modifier *modifier; - const struct vkd3d_sm4_opcode_info *info; - struct sm4_instruction instr = {0}; - unsigned int dst_count, src_count; -@@ -5655,6 +4723,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - for (unsigned int i = 0; i < ins->src_count; ++i) - instr.srcs[i] = ins->src[i]; - -+ if (ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w) -+ { -+ VKD3D_ASSERT(instr.modifier_count < ARRAY_SIZE(instr.modifiers)); -+ modifier = &instr.modifiers[instr.modifier_count++]; -+ modifier->type = VKD3D_SM4_MODIFIER_AOFFIMMI; -+ modifier->u.aoffimmi.u = ins->texel_offset.u; -+ modifier->u.aoffimmi.v = ins->texel_offset.v; -+ modifier->u.aoffimmi.w = ins->texel_offset.w; -+ } -+ -+ if (info->is_conditional_op) -+ { -+ if (ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ) -+ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; -+ } -+ - write_sm4_instruction(tpf, &instr); - } - -@@ -5704,6 +4788,11 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - - case VKD3DSIH_ADD: - case VKD3DSIH_AND: -+ case VKD3DSIH_BREAK: -+ case VKD3DSIH_CASE: -+ case VKD3DSIH_CONTINUE: -+ case VKD3DSIH_DEFAULT: -+ case VKD3DSIH_DISCARD: - case VKD3DSIH_DIV: - case VKD3DSIH_DP2: - case VKD3DSIH_DP3: -@@ -5714,6 +4803,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - case VKD3DSIH_DSY: - case VKD3DSIH_DSY_COARSE: - case VKD3DSIH_DSY_FINE: -+ case VKD3DSIH_ELSE: -+ case VKD3DSIH_ENDIF: -+ case VKD3DSIH_ENDLOOP: -+ case VKD3DSIH_ENDSWITCH: - case VKD3DSIH_EQO: - case VKD3DSIH_EXP: - case VKD3DSIH_F16TOF32: -@@ -5721,9 +4814,14 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - case VKD3DSIH_FRC: - case VKD3DSIH_FTOI: - case VKD3DSIH_FTOU: -+ case VKD3DSIH_GATHER4: -+ case VKD3DSIH_GATHER4_PO: - case VKD3DSIH_GEO: -+ case VKD3DSIH_HS_CONTROL_POINT_PHASE: -+ case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_IADD: - case VKD3DSIH_IEQ: -+ case VKD3DSIH_IF: - case VKD3DSIH_IGE: - case VKD3DSIH_ILT: - case VKD3DSIH_IMAD: -@@ -5735,7 +4833,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - case VKD3DSIH_ISHL: - case VKD3DSIH_ISHR: - case VKD3DSIH_ITOF: -+ case VKD3DSIH_LD: -+ case VKD3DSIH_LD2DMS: -+ case VKD3DSIH_LD_RAW: -+ case VKD3DSIH_LD_UAV_TYPED: - case VKD3DSIH_LOG: -+ case VKD3DSIH_LOOP: - case VKD3DSIH_LTO: - case VKD3DSIH_MAD: - case VKD3DSIH_MAX: -@@ -5747,14 +4850,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - case VKD3DSIH_NOT: - case VKD3DSIH_OR: - case VKD3DSIH_RCP: -+ case VKD3DSIH_RESINFO: -+ case VKD3DSIH_RET: - case VKD3DSIH_ROUND_NE: - case VKD3DSIH_ROUND_NI: - case VKD3DSIH_ROUND_PI: - case VKD3DSIH_ROUND_Z: - case VKD3DSIH_RSQ: -+ case VKD3DSIH_SAMPLE: -+ case VKD3DSIH_SAMPLE_B: -+ case VKD3DSIH_SAMPLE_C: -+ case VKD3DSIH_SAMPLE_C_LZ: -+ case VKD3DSIH_SAMPLE_GRAD: - case VKD3DSIH_SAMPLE_INFO: -+ case VKD3DSIH_SAMPLE_LOD: - case VKD3DSIH_SINCOS: - case VKD3DSIH_SQRT: -+ case VKD3DSIH_STORE_RAW: -+ case VKD3DSIH_STORE_UAV_TYPED: -+ case VKD3DSIH_SWITCH: - case VKD3DSIH_UDIV: - case VKD3DSIH_UGE: - case VKD3DSIH_ULT: -@@ -5772,90 +4886,15 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - } - } - --static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) -+static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_program *program) - { -- const struct hlsl_ir_node *instr; -- unsigned int vsir_instr_idx; -- -- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -- { -- if (instr->data_type) -- { -- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -- { -- hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", -- instr->data_type->class); -- break; -- } -- -- if (!instr->reg.allocated) -- { -- VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); -- continue; -- } -- } -- -- switch (instr->type) -- { -- case HLSL_IR_CALL: -- case HLSL_IR_CONSTANT: -- vkd3d_unreachable(); -- -- case HLSL_IR_IF: -- write_sm4_if(tpf, hlsl_ir_if(instr)); -- break; -- -- case HLSL_IR_JUMP: -- write_sm4_jump(tpf, hlsl_ir_jump(instr)); -- break; -- -- case HLSL_IR_LOAD: -- write_sm4_load(tpf, hlsl_ir_load(instr)); -- break; -- -- case HLSL_IR_RESOURCE_LOAD: -- write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); -- break; -- -- case HLSL_IR_RESOURCE_STORE: -- write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); -- break; -- -- case HLSL_IR_LOOP: -- write_sm4_loop(tpf, hlsl_ir_loop(instr)); -- break; -- -- case HLSL_IR_STORE: -- write_sm4_store(tpf, hlsl_ir_store(instr)); -- break; -- -- case HLSL_IR_SWITCH: -- write_sm4_switch(tpf, hlsl_ir_switch(instr)); -- break; -- -- case HLSL_IR_SWIZZLE: -- write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); -- break; -- -- case HLSL_IR_VSIR_INSTRUCTION_REF: -- vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; -- tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); -- } -- } --} -+ unsigned int i; - --static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) --{ - if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) - tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); - -- write_sm4_block(tpf, &func->body); -- -- write_sm4_ret(tpf); -+ for (i = 0; i < program->instructions.count; ++i) -+ tpf_handle_instruction(tpf, &program->instructions.elements[i]); - } - - static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) -@@ -5944,16 +4983,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec - write_sm4_dcl_textures(tpf, resource, true); - } - -- if (version->type == VKD3D_SHADER_TYPE_HULL) -- tpf_write_hs_control_point_phase(tpf); -- -- tpf_write_shader_function(tpf, entry_func); -- -- if (version->type == VKD3D_SHADER_TYPE_HULL) -- { -- tpf_write_hs_fork_phase(tpf); -- tpf_write_shader_function(tpf, ctx->patch_constant_func); -- } -+ tpf_write_program(tpf, tpf->program); - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index d751f2dc6bf..db61eec8f28 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -23,6 +23,8 @@ - #include - #include - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -805,6 +807,9 @@ struct vkd3d_shader_scan_context - - struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; - size_t combined_samplers_size; -+ -+ enum vkd3d_shader_tessellator_output_primitive output_primitive; -+ enum vkd3d_shader_tessellator_partitioning partitioning; - }; - - static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, -@@ -1262,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, - instruction->declaration.structured_resource.byte_stride, false, instruction->flags); - break; -+ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: -+ context->output_primitive = instruction->declaration.tessellator_output_primitive; -+ break; -+ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: -+ context->partitioning = instruction->declaration.tessellator_partitioning; -+ break; - case VKD3DSIH_IF: - case VKD3DSIH_IFC: - cf_info = vkd3d_shader_scan_push_cf_info(context); -@@ -1502,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) - { - struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; -+ struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; - struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; - struct vkd3d_shader_scan_descriptor_info *descriptor_info; - struct vkd3d_shader_scan_signature_info *signature_info; -@@ -1530,6 +1542,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - descriptor_info1 = &local_descriptor_info1; - } - -+ tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); -+ - vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, - descriptor_info1, combined_sampler_info, message_context); - -@@ -1573,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh - if (!ret && descriptor_info) - ret = convert_descriptor_info(descriptor_info, descriptor_info1); - -+ if (!ret && tessellation_info) -+ { -+ tessellation_info->output_primitive = context.output_primitive; -+ tessellation_info->partitioning = context.partitioning; -+ } -+ - if (ret < 0) - { - if (combined_sampler_info) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index be7c0b73a22..ad04972b3fb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1431,6 +1431,7 @@ struct vsir_program - uint8_t diffuse_written_mask; - enum vsir_control_flow_type cf_type; - enum vsir_normalisation_level normalisation_level; -+ enum vkd3d_tessellator_domain tess_domain; - - const char **block_names; - size_t block_name_count; --- -2.45.2 - diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-a68fd1b0ded735580b0ec9025f75fe02d62.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-a68fd1b0ded735580b0ec9025f75fe02d62.patch new file mode 100644 index 00000000..04e0470c --- /dev/null +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-a68fd1b0ded735580b0ec9025f75fe02d62.patch @@ -0,0 +1,14861 @@ +From c2e60d4422dcf282cc05b674c4f5326929b49326 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Fri, 29 Nov 2024 07:14:57 +1100 +Subject: [PATCH] Updated vkd3d to a68fd1b0ded735580b0ec9025f75fe02d62966df. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 2 +- + libs/vkd3d/include/vkd3d_shader.h | 219 ++ + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 34 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 908 ++---- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 35 + + libs/vkd3d/libs/vkd3d-shader/dxil.c | 36 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 163 +- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 9 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 289 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 143 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 404 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2614 ++++++++++++++--- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 149 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 1987 ++++++++++--- + libs/vkd3d/libs/vkd3d-shader/msl.c | 465 ++- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + + libs/vkd3d/libs/vkd3d-shader/spirv.c | 709 +++-- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 1561 ++-------- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 28 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 45 +- + libs/vkd3d/libs/vkd3d/command.c | 273 +- + libs/vkd3d/libs/vkd3d/device.c | 58 +- + libs/vkd3d/libs/vkd3d/state.c | 237 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 63 +- + 26 files changed, 6880 insertions(+), 3556 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index ec1dd70c9b2..fd62730f948 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -275,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index f95caa2f825..af55d63a5c8 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -112,6 +112,11 @@ enum vkd3d_shader_structure_type + * \since 1.13 + */ + VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, ++ /** ++ * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -471,6 +476,109 @@ enum vkd3d_shader_binding_flag + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), + }; + ++/** ++ * The factor used to interpolate the fragment output colour with fog. ++ * ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for specification of the ++ * interpolation factor as defined here. ++ * ++ * The following variables may be used to determine the interpolation factor: ++ * ++ * c = The fog coordinate value output from the vertex shader. This is an ++ * inter-stage varying with the semantic name "FOG" and semantic index 0. ++ * It may be modified by VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE. ++ * E = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_END. ++ * k = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE. ++ * ++ * \since 1.15 ++ */ ++enum vkd3d_shader_fog_fragment_mode ++{ ++ /** ++ * No fog interpolation is applied; ++ * the output colour is passed through unmodified. ++ * Equivalently, the fog interpolation factor is 1. ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_NONE = 0x0, ++ /** ++ * The fog interpolation factor is 2^-(k * c). ++ * ++ * In order to implement traditional exponential fog, as present in ++ * Direct3D and OpenGL, i.e. ++ * ++ * e^-(density * c) ++ * ++ * set ++ * ++ * k = density * logâ‚‚(e) ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_EXP = 0x1, ++ /** ++ * The fog interpolation factor is 2^-((k * c)²). ++ * ++ * In order to implement traditional square-exponential fog, as present in ++ * Direct3D and OpenGL, i.e. ++ * ++ * e^-((density * c)²) ++ * ++ * set ++ * ++ * k = density * √logâ‚‚(e) ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_EXP2 = 0x2, ++ /** ++ * The fog interpolation factor is (E - c) * k. ++ * ++ * In order to implement traditional linear fog, as present in Direct3D and ++ * OpenGL, i.e. ++ * ++ * (end - c) / (end - start) ++ * ++ * set ++ * ++ * E = end ++ * k = 1 / (end - start) ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_LINEAR = 0x3, ++}; ++ ++/** ++ * The source of the fog varying output by a pre-rasterization shader. ++ * The fog varying is defined as the output varying with the semantic name "FOG" ++ * and semantic index 0. ++ * ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE for further documentation of this ++ * parameter. ++ * ++ * \since 1.15 ++ */ ++enum vkd3d_shader_fog_source ++{ ++ /** ++ * The source shader is not modified. That is, the fog varying in the target ++ * shader is the original fog varying if and only if present. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_FOG = 0x0, ++ /** ++ * If the source shader has a fog varying, it is not modified. ++ * Otherwise, if the source shader outputs a varying with semantic name ++ * "COLOR" and semantic index 1 whose index includes a W component, ++ * said W component is output as fog varying. ++ * Otherwise, no fog varying is output. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W = 0x1, ++ /** ++ * The fog source is the Z component of the position output by the vertex ++ * shader. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_Z = 0x2, ++ /** ++ * The fog source is the W component of the position output by the vertex ++ * shader. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_W = 0x3, ++}; ++ + /** + * The manner in which a parameter value is provided to the shader, used in + * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. +@@ -734,6 +842,97 @@ enum vkd3d_shader_parameter_name + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, ++ /** ++ * Fog mode used in fragment shaders. ++ * ++ * The value specified by this parameter must be a member of ++ * enum vkd3d_shader_fog_fragment_mode. ++ * ++ * If not VKD3D_SHADER_FOG_FRAGMENT_NONE, the pixel shader colour output at ++ * location 0 is linearly interpolated with the fog colour defined by ++ * VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR. The interpolation factor is ++ * defined according to the enumerant selected by this parameter. ++ * The interpolated value is then outputted instead of the original value at ++ * location 0. ++ * ++ * An interpolation factor of 0 specifies to use the fog colour; a factor of ++ * 1 specifies to use the original colour output. The interpolation factor ++ * is clamped to the [0, 1] range before interpolating. ++ * ++ * The default value is VKD3D_SHADER_FOG_FRAGMENT_NONE. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE, ++ /** ++ * Fog colour. ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of ++ * fog. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. ++ * ++ * The default value is transparent black, i.e. the vector {0, 0, 0, 0}. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, ++ /** ++ * End coordinate for linear fog. ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of ++ * fog. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * The default value is 1.0. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_END, ++ /** ++ * Scale value for fog. ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of ++ * fog. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * The default value is 1.0. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, ++ /** ++ * Fog source. The value specified by this parameter must be a member of ++ * enum vkd3d_shader_fog_source. ++ * ++ * This parameter replaces or suppletes the fog varying output by a ++ * pre-rasterization shader. The fog varying is defined as the output ++ * varying with the semantic name "FOG" and semantic index 0. ++ * ++ * Together with other fog parameters, this parameter can be used to ++ * implement fixed function fog, as present in Direct3D versions up to 9, ++ * if the target environment does not support fog as part of its own ++ * fixed-function API (as Vulkan and core OpenGL). ++ * ++ * The default value is VKD3D_SHADER_FOG_SOURCE_FOG. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), + }; +@@ -2040,6 +2239,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info + unsigned int combined_sampler_count; + }; + ++/** ++ * A chained structure describing the tessellation information in a hull shader. ++ * ++ * This structure extends vkd3d_shader_compile_info. ++ * ++ * \since 1.15 ++ */ ++struct vkd3d_shader_scan_hull_shader_tessellation_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** The tessellation output primitive. */ ++ enum vkd3d_shader_tessellator_output_primitive output_primitive; ++ /** The tessellation partitioning mode. */ ++ enum vkd3d_shader_tessellator_partitioning partitioning; ++}; ++ + /** + * Data type of a shader varying, returned as part of struct + * vkd3d_shader_signature_element. +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index f60ef7db769..c2c6ad67804 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -20,6 +20,7 @@ + #define WIDL_C_INLINE_WRAPPERS + #endif + #define COBJMACROS ++ + #define CONST_VTABLE + #include "vkd3d.h" + #include "vkd3d_blob.h" +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 7c5444f63a3..8c96befadea 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -49,7 +49,7 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_BFREV ] = "bfrev", + [VKD3DSIH_BRANCH ] = "branch", + [VKD3DSIH_BREAK ] = "break", +- [VKD3DSIH_BREAKC ] = "breakc", ++ [VKD3DSIH_BREAKC ] = "break", + [VKD3DSIH_BREAKP ] = "breakp", + [VKD3DSIH_BUFINFO ] = "bufinfo", + [VKD3DSIH_CALL ] = "call", +@@ -183,7 +183,7 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_IDIV ] = "idiv", + [VKD3DSIH_IEQ ] = "ieq", + [VKD3DSIH_IF ] = "if", +- [VKD3DSIH_IFC ] = "ifc", ++ [VKD3DSIH_IFC ] = "if", + [VKD3DSIH_IGE ] = "ige", + [VKD3DSIH_ILT ] = "ilt", + [VKD3DSIH_IMAD ] = "imad", +@@ -815,7 +815,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, + usage = "tessfactor"; + break; + case VKD3D_DECL_USAGE_POSITIONT: +- usage = "positionT"; ++ usage = "positiont"; + indexed = true; + break; + case VKD3D_DECL_USAGE_FOG: +@@ -2547,6 +2547,33 @@ static void trace_signature(const struct shader_signature *signature, const char + vkd3d_string_buffer_cleanup(&buffer); + } + ++static void trace_io_declarations(const struct vsir_program *program) ++{ ++ struct vkd3d_string_buffer buffer; ++ bool empty = true; ++ unsigned int i; ++ ++ vkd3d_string_buffer_init(&buffer); ++ ++ vkd3d_string_buffer_printf(&buffer, "Input/output declarations:"); ++ ++ for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) ++ { ++ if (bitmap_is_set(program->io_dcls, i)) ++ { ++ empty = false; ++ vkd3d_string_buffer_printf(&buffer, " %u", i); ++ } ++ } ++ ++ if (empty) ++ vkd3d_string_buffer_printf(&buffer, " empty"); ++ ++ TRACE("%s\n", buffer.buffer); ++ ++ vkd3d_string_buffer_cleanup(&buffer); ++} ++ + void vsir_program_trace(const struct vsir_program *program) + { + const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; +@@ -2556,6 +2583,7 @@ void vsir_program_trace(const struct vsir_program *program) + trace_signature(&program->input_signature, "Input"); + trace_signature(&program->output_signature, "Output"); + trace_signature(&program->patch_constant_signature, "Patch-constant"); ++ trace_io_declarations(program); + + if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) + return; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 9e2eacbcfa6..a931883e8d1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -8,7 +8,7 @@ + * Copyright 2006 Ivan Gyurdiev + * Copyright 2007-2008 Stefan Dösinger for CodeWeavers + * Copyright 2009, 2021 Henri Verbeet for CodeWeavers +- * Copyright 2019-2020 Zebediah Figura for CodeWeavers ++ * Copyright 2019-2020, 2023-2024 Elizabeth Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public +@@ -25,7 +25,7 @@ + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +-#include "hlsl.h" ++#include "vkd3d_shader_private.h" + + #define VKD3D_SM1_VS 0xfffeu + #define VKD3D_SM1_PS 0xffffu +@@ -235,7 +235,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = + /* Arithmetic */ + {VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP}, + {VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV}, +- {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}, {~0u, ~0u}}, ++ {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}}, + {VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD}, + {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, + {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, +@@ -248,22 +248,22 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = + {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, + {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, + {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, +- {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, ++ {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, + {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, + {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, + {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, + {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, + {VKD3D_SM1_OP_LIT, 1, 1, VKD3DSIH_LIT}, + {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, +- {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, ++ {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP, {2, 0}}, + {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, +- {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, +- {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, +- {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, { 2, 1}}, +- {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM,}, +- {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, +- {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, ++ {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, ++ {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, ++ {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, {2, 1}}, ++ {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}}, ++ {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, ++ {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, ++ {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, + /* Matrix */ + {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, + {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, +@@ -274,27 +274,27 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = + {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, + /* Constant definitions */ + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, +- {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +- {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, ++ {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, ++ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 0}}, + /* Control flow */ +- {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, +- {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}, {~0u, ~0u}}, +- +- {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, +- {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, ++ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}}, ++ {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}}, ++ {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}}, ++ {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, ++ {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}}, ++ {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}}, ++ {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, ++ {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, ++ {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, ++ {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}}, ++ {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}}, ++ {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}}, ++ {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}}, ++ {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}}, ++ {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}}, ++ ++ {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, ++ {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, + {0, 0, 0, VKD3DSIH_INVALID}, + }; + +@@ -307,89 +307,84 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = + {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, + {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, + {VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL}, +- {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP}, +- {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ}, ++ {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP, {2, 0}}, ++ {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ, {2, 0}}, + {VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3}, +- {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4}, +- {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN}, +- {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, +- {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, +- {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, +- {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, +- {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, +- {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, +- {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, +- {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, +- {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, ++ {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4, {1, 2}}, ++ {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN, {2, 0}}, ++ {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX, {2, 0}}, ++ {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, ++ {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP, {2, 0}}, ++ {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG, {2, 0}}, + {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, +- {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, +- {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, { 1, 4}}, +- {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}, { 3, 0}}, +- {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, +- {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, +- {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM}, +- {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, +- {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}, {~0u, ~0u}}, ++ {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC, {2, 0}}, ++ {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, {1, 4}}, ++ {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}}, ++ {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, ++ {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, ++ {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, ++ {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, ++ {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, ++ {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}}, + /* Matrix */ +- {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, +- {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, +- {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4}, +- {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, +- {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, ++ {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4, {2, 0}}, ++ {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3, {2, 0}}, ++ {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4, {2, 0}}, ++ {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3, {2, 0}}, ++ {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2, {2, 0}}, + /* Declarations */ +- {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, ++ {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL, {2, 0}}, + /* Constant definitions */ + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, +- {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, +- {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, ++ {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, ++ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 1}}, + /* Control flow */ +- {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, +- {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}, {~0u, ~0u}}, ++ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}}, ++ {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}}, ++ {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}}, ++ {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, ++ {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}}, ++ {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}}, ++ {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, ++ {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, ++ {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, ++ {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}}, ++ {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}}, ++ {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}}, ++ {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}}, ++ {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}}, ++ {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}}, + /* Texture */ +- {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1 ,4}, { 1, 4}}, +- {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1 ,0}, { 3, 0}}, +- {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, { 1, 4}}, +- {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, { 0, 0}}, +- {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, { 1, 3}}, +- {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, { 1, 4}}, +- {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, { 1, 4}}, +- {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, +- {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, +- {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE}, ++ {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1, 4}, {1, 4}}, ++ {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1, 0}}, ++ {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, {1, 4}}, ++ {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}}, ++ {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, {0, 0}}, ++ {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, {1, 3}}, ++ {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, {1, 4}}, ++ {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, {1, 4}}, ++ {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}}, ++ {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}}, ++ {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}}, ++ {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, ++ {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, ++ {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE, {1, 4}, {1, 4}}, + {0, 0, 0, VKD3DSIH_INVALID}, + }; + +@@ -638,7 +633,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + return; + } + ++ /* Normally VSIR mandates that the register mask is a subset of the usage ++ * mask, and the usage mask is a subset of the signature mask. This is ++ * doesn't always happen with SM1-3 registers, because of the limited ++ * flexibility with expressing swizzles. ++ * ++ * For example it's easy to find shaders like this: ++ * ps_3_0 ++ * [...] ++ * dcl_texcoord0 v0 ++ * [...] ++ * texld r2.xyzw, v0.xyzw, s1.xyzw ++ * [...] ++ * ++ * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to ++ * compute the signature mask, but the texld instruction apparently uses all ++ * the components. Of course the last two components are ignored, but ++ * formally they seem to be used. So we end up with a signature element with ++ * mask .xy and usage mask .xyzw. ++ * ++ * In order to avoid this problem, when generating VSIR code with SM4 ++ * normalisation level we remove the unused components in the write mask. We ++ * don't do that when targetting the SM1 normalisation level (i.e., when ++ * disassembling) so as to generate the same disassembly code as native. */ + element->used_mask |= mask; ++ if (program->normalisation_level >= VSIR_NORMALISED_SM4) ++ element->used_mask &= element->mask; + } + + static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, +@@ -968,6 +988,8 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const + + if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) + sm1->p.program->has_point_size = true; ++ if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_FOG) ++ sm1->p.program->has_fog = true; + } + + static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, +@@ -1268,6 +1290,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) + { + const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; ++ enum vsir_normalisation_level normalisation_level; + const uint32_t *code = compile_info->source.code; + size_t code_size = compile_info->source.size; + struct vkd3d_shader_version version; +@@ -1318,9 +1341,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st + sm1->start = &code[1]; + sm1->end = &code[token_count]; + ++ normalisation_level = VSIR_NORMALISED_SM1; ++ if (compile_info->target_type != VKD3D_SHADER_TARGET_D3D_ASM) ++ normalisation_level = VSIR_NORMALISED_SM4; ++ + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vsir_program_init(program, compile_info, &version, +- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) ++ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); +@@ -1525,387 +1552,73 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns + type == VKD3D_SHADER_TYPE_VERTEX ? VKD3D_SM1_VS : VKD3D_SM1_PS); + } + +-D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) ++static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( ++ struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) + { +- switch (type->class) +- { +- case HLSL_CLASS_ARRAY: +- return hlsl_sm1_class(type->e.array.type); +- case HLSL_CLASS_MATRIX: +- VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); +- if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) +- return D3DXPC_MATRIX_COLUMNS; +- else +- return D3DXPC_MATRIX_ROWS; +- case HLSL_CLASS_SCALAR: +- return D3DXPC_SCALAR; +- case HLSL_CLASS_STRUCT: +- return D3DXPC_STRUCT; +- case HLSL_CLASS_VECTOR: +- return D3DXPC_VECTOR; +- case HLSL_CLASS_PIXEL_SHADER: +- case HLSL_CLASS_SAMPLER: +- case HLSL_CLASS_STRING: +- case HLSL_CLASS_TEXTURE: +- case HLSL_CLASS_VERTEX_SHADER: +- return D3DXPC_OBJECT; +- case HLSL_CLASS_DEPTH_STENCIL_STATE: +- case HLSL_CLASS_DEPTH_STENCIL_VIEW: +- case HLSL_CLASS_EFFECT_GROUP: +- case HLSL_CLASS_ERROR: +- case HLSL_CLASS_PASS: +- case HLSL_CLASS_RASTERIZER_STATE: +- case HLSL_CLASS_RENDER_TARGET_VIEW: +- case HLSL_CLASS_TECHNIQUE: +- case HLSL_CLASS_UAV: +- case HLSL_CLASS_VOID: +- case HLSL_CLASS_CONSTANT_BUFFER: +- case HLSL_CLASS_COMPUTE_SHADER: +- case HLSL_CLASS_DOMAIN_SHADER: +- case HLSL_CLASS_HULL_SHADER: +- case HLSL_CLASS_GEOMETRY_SHADER: +- case HLSL_CLASS_BLEND_STATE: +- case HLSL_CLASS_NULL: +- break; +- } +- +- vkd3d_unreachable(); +-} ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ const struct vkd3d_sm1_opcode_info *info; ++ unsigned int i = 0; + +-D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) +-{ +- switch (type->class) ++ for (;;) + { +- case HLSL_CLASS_SCALAR: +- case HLSL_CLASS_VECTOR: +- case HLSL_CLASS_MATRIX: +- switch (type->e.numeric.type) +- { +- case HLSL_TYPE_BOOL: +- return D3DXPT_BOOL; +- /* Actually double behaves differently depending on DLL version: +- * For <= 36, it maps to D3DXPT_FLOAT. +- * For 37-40, it maps to zero (D3DXPT_VOID). +- * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* +- * values are mostly compatible with D3DXPT_*). +- * However, the latter two cases look like bugs, and a reasonable +- * application certainly wouldn't know what to do with them. +- * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ +- case HLSL_TYPE_DOUBLE: +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_HALF: +- return D3DXPT_FLOAT; +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- return D3DXPT_INT; +- default: +- vkd3d_unreachable(); +- } +- +- case HLSL_CLASS_SAMPLER: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3DXPT_SAMPLER1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3DXPT_SAMPLER2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3DXPT_SAMPLER3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3DXPT_SAMPLERCUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3DXPT_SAMPLER; +- default: +- ERR("Invalid dimension %#x.\n", type->sampler_dim); +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_CLASS_TEXTURE: +- switch (type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_1D: +- return D3DXPT_TEXTURE1D; +- case HLSL_SAMPLER_DIM_2D: +- return D3DXPT_TEXTURE2D; +- case HLSL_SAMPLER_DIM_3D: +- return D3DXPT_TEXTURE3D; +- case HLSL_SAMPLER_DIM_CUBE: +- return D3DXPT_TEXTURECUBE; +- case HLSL_SAMPLER_DIM_GENERIC: +- return D3DXPT_TEXTURE; +- default: +- ERR("Invalid dimension %#x.\n", type->sampler_dim); +- vkd3d_unreachable(); +- } +- break; ++ info = &d3dbc->opcode_table[i++]; ++ if (info->vkd3d_opcode == VKD3DSIH_INVALID) ++ return NULL; + +- case HLSL_CLASS_ARRAY: +- return hlsl_sm1_base_type(type->e.array.type); +- +- case HLSL_CLASS_STRUCT: +- return D3DXPT_VOID; +- +- case HLSL_CLASS_STRING: +- return D3DXPT_STRING; +- +- case HLSL_CLASS_PIXEL_SHADER: +- return D3DXPT_PIXELSHADER; +- +- case HLSL_CLASS_VERTEX_SHADER: +- return D3DXPT_VERTEXSHADER; +- +- case HLSL_CLASS_DEPTH_STENCIL_STATE: +- case HLSL_CLASS_DEPTH_STENCIL_VIEW: +- case HLSL_CLASS_EFFECT_GROUP: +- case HLSL_CLASS_ERROR: +- case HLSL_CLASS_PASS: +- case HLSL_CLASS_RASTERIZER_STATE: +- case HLSL_CLASS_RENDER_TARGET_VIEW: +- case HLSL_CLASS_TECHNIQUE: +- case HLSL_CLASS_UAV: +- case HLSL_CLASS_VOID: +- case HLSL_CLASS_CONSTANT_BUFFER: +- case HLSL_CLASS_COMPUTE_SHADER: +- case HLSL_CLASS_DOMAIN_SHADER: +- case HLSL_CLASS_HULL_SHADER: +- case HLSL_CLASS_GEOMETRY_SHADER: +- case HLSL_CLASS_BLEND_STATE: +- case HLSL_CLASS_NULL: +- break; ++ if (vkd3d_opcode == info->vkd3d_opcode ++ && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) ++ && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) ++ || !info->max_version.major)) ++ return info; + } +- +- vkd3d_unreachable(); + } + +-static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) ++static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( ++ struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) + { +- const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); +- unsigned int array_size = hlsl_get_multiarray_size(type); +- unsigned int field_count = 0; +- size_t fields_offset = 0; +- size_t i; +- +- if (type->bytecode_offset) +- return; ++ const struct vkd3d_sm1_opcode_info *info; + +- if (array_type->class == HLSL_CLASS_STRUCT) ++ if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) + { +- field_count = array_type->e.record.field_count; +- +- for (i = 0; i < field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- field->name_bytecode_offset = put_string(buffer, field->name); +- write_sm1_type(buffer, field->type, ctab_start); +- } +- +- fields_offset = bytecode_align(buffer) - ctab_start; +- +- for (i = 0; i < field_count; ++i) +- { +- struct hlsl_struct_field *field = &array_type->e.record.fields[i]; +- +- put_u32(buffer, field->name_bytecode_offset - ctab_start); +- put_u32(buffer, field->type->bytecode_offset - ctab_start); +- } ++ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, ++ "Opcode %#x not supported for shader profile.", ins->opcode); ++ d3dbc->failed = true; ++ return NULL; + } + +- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); +- put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); +- put_u32(buffer, vkd3d_make_u32(array_size, field_count)); +- put_u32(buffer, fields_offset); +-} +- +-static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) +-{ +- struct hlsl_ir_var *var; +- +- list_remove(&to_sort->extern_entry); +- +- LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) ++ if (ins->dst_count != info->dst_count) + { +- if (strcmp(to_sort->name, var->name) < 0) +- { +- list_add_before(&var->extern_entry, &to_sort->extern_entry); +- return; +- } ++ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, ++ "Invalid destination count %u for vsir instruction %#x (expected %u).", ++ ins->dst_count, ins->opcode, info->dst_count); ++ d3dbc->failed = true; ++ return NULL; + } +- +- list_add_tail(sorted, &to_sort->extern_entry); +-} +- +-static void sm1_sort_externs(struct hlsl_ctx *ctx) +-{ +- struct list sorted = LIST_INIT(sorted); +- struct hlsl_ir_var *var, *next; +- +- LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ if (ins->src_count != info->src_count) + { +- if (var->is_uniform) +- sm1_sort_extern(&sorted, var); ++ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, ++ "Invalid source count %u for vsir instruction %#x (expected %u).", ++ ins->src_count, ins->opcode, info->src_count); ++ d3dbc->failed = true; ++ return NULL; + } +- list_move_tail(&ctx->extern_vars, &sorted); ++ ++ return info; + } + +-void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_comment(struct d3dbc_compiler *d3dbc, ++ uint32_t tag, const struct vkd3d_shader_code *comment) + { +- size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset; +- unsigned int uniform_count = 0; +- struct hlsl_ir_var *var; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- unsigned int r; +- +- for (r = 0; r <= HLSL_REGSET_LAST; ++r) +- { +- if (var->semantic.name || !var->regs[r].allocated || !var->last_read) +- continue; +- +- ++uniform_count; +- +- if (var->is_param && var->is_uniform) +- { +- char *new_name; +- +- if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) +- return; +- vkd3d_free((char *)var->name); +- var->name = new_name; +- } +- } +- } +- +- sm1_sort_externs(ctx); +- +- size_offset = put_u32(buffer, 0); +- ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); +- +- ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ +- creator_offset = put_u32(buffer, 0); +- put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); +- put_u32(buffer, uniform_count); +- vars_offset = put_u32(buffer, 0); +- put_u32(buffer, 0); /* FIXME: flags */ +- put_u32(buffer, 0); /* FIXME: target string */ +- +- vars_start = bytecode_align(buffer); +- set_u32(buffer, vars_offset, vars_start - ctab_start); +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- unsigned int r; +- +- for (r = 0; r <= HLSL_REGSET_LAST; ++r) +- { +- if (var->semantic.name || !var->regs[r].allocated || !var->last_read) +- continue; +- +- put_u32(buffer, 0); /* name */ +- if (r == HLSL_REGSET_NUMERIC) +- { +- put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); +- put_u32(buffer, var->bind_count[r]); +- } +- else +- { +- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); +- put_u32(buffer, var->bind_count[r]); +- } +- put_u32(buffer, 0); /* type */ +- put_u32(buffer, 0); /* default value */ +- } +- } +- +- uniform_count = 0; +- +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +- { +- unsigned int r; +- +- for (r = 0; r <= HLSL_REGSET_LAST; ++r) +- { +- size_t var_offset, name_offset; +- +- if (var->semantic.name || !var->regs[r].allocated || !var->last_read) +- continue; +- +- var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); +- +- name_offset = put_string(buffer, var->name); +- set_u32(buffer, var_offset, name_offset - ctab_start); +- +- write_sm1_type(buffer, var->data_type, ctab_start); +- set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); +- +- if (var->default_values) +- { +- unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; +- unsigned int comp_count = hlsl_type_component_count(var->data_type); +- unsigned int default_value_offset; +- unsigned int k; +- +- default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); +- set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); +- +- for (k = 0; k < comp_count; ++k) +- { +- struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); +- unsigned int comp_offset; +- enum hlsl_regset regset; +- +- comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); +- if (regset == HLSL_REGSET_NUMERIC) +- { +- union +- { +- uint32_t u; +- float f; +- } uni; +- +- switch (comp_type->e.numeric.type) +- { +- case HLSL_TYPE_DOUBLE: +- if (ctx->double_as_float_alias) +- uni.u = var->default_values[k].number.u; +- else +- uni.u = 0; +- break; +- +- case HLSL_TYPE_INT: +- uni.f = var->default_values[k].number.i; +- break; +- +- case HLSL_TYPE_UINT: +- case HLSL_TYPE_BOOL: +- uni.f = var->default_values[k].number.u; +- break; +- +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- uni.u = var->default_values[k].number.u; +- break; +- +- default: +- vkd3d_unreachable(); +- } +- +- set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); +- } +- } +- } ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; ++ size_t offset, start, end; + +- ++uniform_count; +- } +- } ++ offset = put_u32(buffer, 0); + +- offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); +- set_u32(buffer, creator_offset, offset - ctab_start); ++ start = put_u32(buffer, tag); ++ bytecode_put_bytes(buffer, comment->code, comment->size); ++ end = bytecode_align(buffer); + +- ctab_end = bytecode_align(buffer); +- set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); ++ set_u32(buffer, offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (end - start) / sizeof(uint32_t))); + } + + static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) +@@ -1914,166 +1627,108 @@ static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) + | ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2); + } + +-struct sm1_instruction ++static uint32_t swizzle_from_vsir(uint32_t swizzle) + { +- enum vkd3d_sm1_opcode opcode; +- unsigned int flags; +- +- struct sm1_dst_register +- { +- enum vkd3d_shader_register_type type; +- enum vkd3d_shader_dst_modifier mod; +- unsigned int writemask; +- uint32_t reg; +- } dst; ++ uint32_t x = vsir_swizzle_get_component(swizzle, 0); ++ uint32_t y = vsir_swizzle_get_component(swizzle, 1); ++ uint32_t z = vsir_swizzle_get_component(swizzle, 2); ++ uint32_t w = vsir_swizzle_get_component(swizzle, 3); + +- struct sm1_src_register +- { +- enum vkd3d_shader_register_type type; +- enum vkd3d_shader_src_modifier mod; +- unsigned int swizzle; +- uint32_t reg; +- } srcs[4]; +- unsigned int src_count; ++ if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) ++ ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); + +- unsigned int has_dst; +-}; ++ return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) ++ | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) ++ | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) ++ | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); ++} + +-static bool is_inconsequential_instr(const struct sm1_instruction *instr) ++static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins) + { +- const struct sm1_src_register *src = &instr->srcs[0]; +- const struct sm1_dst_register *dst = &instr->dst; ++ const struct vkd3d_shader_dst_param *dst = &ins->dst[0]; ++ const struct vkd3d_shader_src_param *src = &ins->src[0]; + unsigned int i; + +- if (instr->opcode != VKD3D_SM1_OP_MOV) ++ if (ins->opcode != VKD3DSIH_MOV) + return false; +- if (dst->mod != VKD3DSPDM_NONE) ++ if (dst->modifiers != VKD3DSPDM_NONE) + return false; +- if (src->mod != VKD3DSPSM_NONE) ++ if (src->modifiers != VKD3DSPSM_NONE) + return false; +- if (src->type != dst->type) ++ if (src->reg.type != dst->reg.type) + return false; +- if (src->reg != dst->reg) ++ if (src->reg.idx[0].offset != dst->reg.idx[0].offset) + return false; + + for (i = 0; i < 4; ++i) + { +- if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) ++ if ((dst->write_mask & (1u << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) + return false; + } + + return true; + } + +-static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) ++static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg) + { +- VKD3D_ASSERT(reg->writemask); ++ VKD3D_ASSERT(reg->write_mask); + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER +- | sm1_encode_register_type(reg->type) +- | (reg->mod << VKD3D_SM1_DST_MODIFIER_SHIFT) +- | (reg->writemask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg); ++ | sm1_encode_register_type(reg->reg.type) ++ | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) ++ | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) | reg->reg.idx[0].offset); + } + +-static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, +- const struct sm1_src_register *reg) ++static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) + { + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER +- | sm1_encode_register_type(reg->type) +- | (reg->mod << VKD3D_SM1_SRC_MODIFIER_SHIFT) +- | (reg->swizzle << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg); ++ | sm1_encode_register_type(reg->reg.type) ++ | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) ++ | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) | reg->reg.idx[0].offset); + } + +-static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) ++static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) + { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; +- uint32_t token = instr->opcode; ++ const struct vkd3d_sm1_opcode_info *info; + unsigned int i; ++ uint32_t token; ++ ++ if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) ++ return; + +- if (is_inconsequential_instr(instr)) ++ if (is_inconsequential_instr(ins)) + return; + +- token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); ++ token = info->sm1_opcode; ++ token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (ins->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + + if (version->major > 1) +- token |= (instr->has_dst + instr->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; ++ token |= (ins->dst_count + ins->src_count) << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT; + put_u32(buffer, token); + +- if (instr->has_dst) +- write_sm1_dst_register(buffer, &instr->dst); +- +- for (i = 0; i < instr->src_count; ++i) +- write_sm1_src_register(buffer, &instr->srcs[i]); +-}; +- +-static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( +- struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) +-{ +- const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; +- const struct vkd3d_sm1_opcode_info *info; +- unsigned int i = 0; +- +- for (;;) +- { +- info = &d3dbc->opcode_table[i++]; +- if (info->vkd3d_opcode == VKD3DSIH_INVALID) +- return NULL; +- +- if (vkd3d_opcode == info->vkd3d_opcode +- && vkd3d_shader_ver_ge(version, info->min_version.major, info->min_version.minor) +- && (vkd3d_shader_ver_le(version, info->max_version.major, info->max_version.minor) +- || !info->max_version.major)) +- return info; +- } +-} +- +-static uint32_t swizzle_from_vsir(uint32_t swizzle) +-{ +- uint32_t x = vsir_swizzle_get_component(swizzle, 0); +- uint32_t y = vsir_swizzle_get_component(swizzle, 1); +- uint32_t z = vsir_swizzle_get_component(swizzle, 2); +- uint32_t w = vsir_swizzle_get_component(swizzle, 3); +- +- if (x & ~0x3u || y & ~0x3u || z & ~0x3u || w & ~0x3u) +- ERR("Unexpected vsir swizzle: 0x%08x.\n", swizzle); +- +- return ((x & 0x3u) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(0)) +- | ((y & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(1)) +- | ((z & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(2)) +- | ((w & 0x3) << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(3)); +-} +- +-static void sm1_src_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_src_param *param, +- struct sm1_src_register *src, const struct vkd3d_shader_location *loc) +-{ +- src->mod = param->modifiers; +- src->reg = param->reg.idx[0].offset; +- src->type = param->reg.type; +- src->swizzle = swizzle_from_vsir(param->swizzle); +- +- if (param->reg.idx[0].rel_addr) ++ for (i = 0; i < ins->dst_count; ++i) + { +- vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, +- "Unhandled relative addressing on source register."); +- d3dbc->failed = true; ++ if (ins->dst[i].reg.idx[0].rel_addr) ++ { ++ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, ++ "Unhandled relative addressing on destination register."); ++ d3dbc->failed = true; ++ } ++ write_sm1_dst_register(buffer, &ins->dst[i]); + } +-} +- +-static void sm1_dst_reg_from_vsir(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_dst_param *param, +- struct sm1_dst_register *dst, const struct vkd3d_shader_location *loc) +-{ +- dst->mod = param->modifiers; +- dst->reg = param->reg.idx[0].offset; +- dst->type = param->reg.type; +- dst->writemask = param->write_mask; + +- if (param->reg.idx[0].rel_addr) ++ for (i = 0; i < ins->src_count; ++i) + { +- vkd3d_shader_error(d3dbc->message_context, loc, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, +- "Unhandled relative addressing on destination register."); +- d3dbc->failed = true; ++ if (ins->src[i].reg.idx[0].rel_addr) ++ { ++ vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, ++ "Unhandled relative addressing on source register."); ++ d3dbc->failed = true; ++ } ++ write_sm1_src_register(buffer, &ins->src[i]); + } +-} ++}; + + static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) + { +@@ -2081,11 +1736,11 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + uint32_t token; + +- const struct sm1_dst_register reg = ++ const struct vkd3d_shader_dst_param reg = + { +- .type = VKD3DSPR_CONST, +- .writemask = VKD3DSP_WRITEMASK_ALL, +- .reg = ins->dst[0].reg.idx[0].offset, ++ .reg.type = VKD3DSPR_CONST, ++ .write_mask = VKD3DSP_WRITEMASK_ALL, ++ .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, + }; + + token = VKD3D_SM1_OP_DEF; +@@ -2103,7 +1758,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, + { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; +- struct sm1_dst_register reg = {0}; ++ struct vkd3d_shader_dst_param reg = {0}; + uint32_t token; + + token = VKD3D_SM1_OP_DCL; +@@ -2115,9 +1770,9 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + +- reg.type = VKD3DSPR_COMBINED_SAMPLER; +- reg.writemask = VKD3DSP_WRITEMASK_ALL; +- reg.reg = reg_id; ++ reg.reg.type = VKD3DSPR_COMBINED_SAMPLER; ++ reg.write_mask = VKD3DSP_WRITEMASK_ALL; ++ reg.reg.idx[0].offset = reg_id; + + write_sm1_dst_register(buffer, ®); + } +@@ -2163,61 +1818,6 @@ static void d3dbc_write_vsir_dcl(struct d3dbc_compiler *d3dbc, const struct vkd3 + } + } + +-static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir_instruction( +- struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) +-{ +- const struct vkd3d_sm1_opcode_info *info; +- +- if (!(info = shader_sm1_get_opcode_info_from_vsir(d3dbc, ins->opcode))) +- { +- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE, +- "Opcode %#x not supported for shader profile.", ins->opcode); +- d3dbc->failed = true; +- return NULL; +- } +- +- if (ins->dst_count != info->dst_count) +- { +- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, +- "Invalid destination count %u for vsir instruction %#x (expected %u).", +- ins->dst_count, ins->opcode, info->dst_count); +- d3dbc->failed = true; +- return NULL; +- } +- if (ins->src_count != info->src_count) +- { +- vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_COUNT, +- "Invalid source count %u for vsir instruction %#x (expected %u).", +- ins->src_count, ins->opcode, info->src_count); +- d3dbc->failed = true; +- return NULL; +- } +- +- return info; +-} +- +-static void d3dbc_write_vsir_simple_instruction(struct d3dbc_compiler *d3dbc, +- const struct vkd3d_shader_instruction *ins) +-{ +- struct sm1_instruction instr = {0}; +- const struct vkd3d_sm1_opcode_info *info; +- +- if (!(info = shader_sm1_get_opcode_info_from_vsir_instruction(d3dbc, ins))) +- return; +- +- instr.opcode = info->sm1_opcode; +- instr.flags = ins->flags; +- instr.has_dst = info->dst_count; +- instr.src_count = info->src_count; +- +- if (instr.has_dst) +- sm1_dst_reg_from_vsir(d3dbc, &ins->dst[0], &instr.dst, &ins->location); +- for (unsigned int i = 0; i < instr.src_count; ++i) +- sm1_src_reg_from_vsir(d3dbc, &ins->src[i], &instr.srcs[i], &ins->location); +- +- d3dbc_write_instruction(d3dbc, &instr); +-} +- + static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) + { + uint32_t writemask; +@@ -2254,7 +1854,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str + case VKD3DSIH_TEX: + case VKD3DSIH_TEXKILL: + case VKD3DSIH_TEXLDD: +- d3dbc_write_vsir_simple_instruction(d3dbc, ins); ++ d3dbc_write_instruction(d3dbc, ins); + break; + + case VKD3DSIH_EXP: +@@ -2271,7 +1871,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str + writemask, ins->opcode); + d3dbc->failed = true; + } +- d3dbc_write_vsir_simple_instruction(d3dbc, ins); ++ d3dbc_write_instruction(d3dbc, ins); + break; + + default: +@@ -2287,13 +1887,13 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; +- struct sm1_dst_register reg = {0}; ++ struct vkd3d_shader_dst_param reg = {0}; + enum vkd3d_decl_usage usage; + uint32_t token, usage_idx; + bool ret; + + if (sm1_register_from_semantic_name(version, element->semantic_name, +- element->semantic_index, output, ®.type, ®.reg)) ++ element->semantic_index, output, ®.reg.type, ®.reg.idx[0].offset)) + { + usage = 0; + usage_idx = 0; +@@ -2302,8 +1902,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + { + ret = sm1_usage_from_semantic_name(element->semantic_name, element->semantic_index, &usage, &usage_idx); + VKD3D_ASSERT(ret); +- reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; +- reg.reg = element->register_index; ++ reg.reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ reg.reg.idx[0].offset = element->register_index; + } + + token = VKD3D_SM1_OP_DCL; +@@ -2316,7 +1916,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT; + put_u32(buffer, token); + +- reg.writemask = element->mask; ++ reg.write_mask = element->mask; + write_sm1_dst_register(buffer, ®); + } + +@@ -2384,9 +1984,7 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + } + + put_u32(buffer, sm1_version(version->type, version->major, version->minor)); +- +- bytecode_put_bytes(buffer, ctab->code, ctab->size); +- ++ d3dbc_write_comment(&d3dbc, VKD3D_MAKE_TAG('C','T','A','B'), ctab); + d3dbc_write_semantic_dcls(&d3dbc); + d3dbc_write_program_instructions(&d3dbc); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +index f6ac8e0829e..81af62f7810 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +@@ -115,6 +115,14 @@ static uint32_t read_u32(const char **ptr) + return ret; + } + ++static uint64_t read_u64(const char **ptr) ++{ ++ uint64_t ret; ++ memcpy(&ret, *ptr, sizeof(ret)); ++ *ptr += sizeof(ret); ++ return ret; ++} ++ + static float read_float(const char **ptr) + { + union +@@ -502,6 +510,28 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + return ret; + } + ++static int shdr_parse_features(const struct vkd3d_shader_dxbc_section_desc *section, ++ struct vkd3d_shader_message_context *message_context, struct vsir_features *f) ++{ ++ const char *data = section->data.code; ++ const char *ptr = data; ++ uint64_t flags; ++ ++ if (!require_space(0, 1, sizeof(uint64_t), section->data.size)) ++ { ++ WARN("Invalid data size %#zx.\n", section->data.size); ++ vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE, ++ "SFI0 section size %zu is too small to contain flags.\n", section->data.size); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ flags = read_u64(&ptr); ++ ++ if (flags & DXBC_SFI0_REQUIRES_ROVS) ++ f->rovs = true; ++ ++ return VKD3D_OK; ++} ++ + static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + struct vkd3d_shader_message_context *message_context, void *context) + { +@@ -558,6 +588,11 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, + desc->byte_code_size = section->data.size; + break; + ++ case TAG_SFI0: ++ if ((ret = shdr_parse_features(section, message_context, &desc->features)) < 0) ++ return ret; ++ break; ++ + case TAG_AON9: + TRACE("Skipping AON9 shader code chunk.\n"); + break; +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 7099bcc9ce2..4493602dfb7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -3824,7 +3824,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par + } + + static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( +- enum vkd3d_shader_sysval_semantic sysval_semantic) ++ enum vkd3d_shader_sysval_semantic sysval_semantic, bool is_input) + { + switch (sysval_semantic) + { +@@ -3834,7 +3834,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( + case VKD3D_SHADER_SV_SAMPLE_INDEX: + return VKD3DSPR_NULL; + case VKD3D_SHADER_SV_COVERAGE: +- return VKD3DSPR_COVERAGE; ++ return is_input ? VKD3DSPR_COVERAGE : VKD3DSPR_SAMPLEMASK; + case VKD3D_SHADER_SV_DEPTH: + return VKD3DSPR_DEPTHOUT; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: +@@ -3884,7 +3884,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + param = ¶ms[i]; + + if (e->register_index == UINT_MAX +- && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) ++ && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input)) != VKD3DSPR_NULL) + { + dst_param_io_init(param, e, io_reg_type); + continue; +@@ -9348,7 +9348,7 @@ static void signature_element_read_additional_element_values(struct signature_el + } + + static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, +- struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) ++ struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain, bool is_input) + { + unsigned int i, j, column_count, operand_count, index; + const struct sm6_metadata_node *node, *element_node; +@@ -9466,7 +9466,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + + if ((is_register = e->register_index == UINT_MAX)) + { +- if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) ++ if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input) == VKD3DSPR_INVALID) + { + WARN("Unhandled I/O register semantic kind %u.\n", j); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, +@@ -9578,17 +9578,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons + } + + if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], +- &program->input_signature, tessellator_domain)) < 0) ++ &program->input_signature, tessellator_domain, true)) < 0) + { + return ret; + } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], +- &program->output_signature, tessellator_domain)) < 0) ++ &program->output_signature, tessellator_domain, false)) < 0) + { + return ret; + } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], +- &program->patch_constant_signature, tessellator_domain)) < 0) ++ &program->patch_constant_signature, tessellator_domain, false)) < 0) + { + return ret; + } +@@ -9717,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); + ins->declaration.tessellator_domain = tessellator_domain; ++ sm6->p.program->tess_domain = tessellator_domain; + } + +-static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, +- const char *type) ++static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, ++ unsigned int count, bool allow_zero, const char *type) + { +- if (!count || count > 32) ++ if ((!count && !allow_zero) || count > 32) + { + WARN("%s control point count %u invalid.\n", type, count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +@@ -9744,6 +9745,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); + ins->declaration.tessellator_partitioning = tessellator_partitioning; ++ ++ sm6->p.program->tess_partitioning = tessellator_partitioning; + } + + static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, +@@ -9760,6 +9763,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser * + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); + ins->declaration.tessellator_output_primitive = primitive; ++ ++ sm6->p.program->tess_output_primitive = primitive; + } + + static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) +@@ -9951,7 +9956,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa + } + + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); +- sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); ++ sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); + sm6->p.program->input_control_point_count = operands[1]; + + return operands[0]; +@@ -10010,9 +10015,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa + } + } + +- sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); ++ sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); + program->input_control_point_count = operands[1]; +- sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); ++ sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); + program->output_control_point_count = operands[2]; + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); +@@ -10351,7 +10356,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; + if (!vsir_program_init(program, compile_info, &version, +- (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) ++ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_NORMALISED_SM6)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); + sm6->ptr = &sm6->start[1]; +@@ -10378,6 +10383,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro + *input_signature = dxbc_desc->input_signature; + *output_signature = dxbc_desc->output_signature; + *patch_constant_signature = dxbc_desc->patch_constant_signature; ++ program->features = dxbc_desc->features; + memset(dxbc_desc, 0, sizeof(*dxbc_desc)); + + block = &sm6->root_block; +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index e22177e1e30..3795add87c7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -36,6 +36,16 @@ struct fx_4_binary_type + uint32_t typeinfo; + }; + ++struct fx_5_shader ++{ ++ uint32_t offset; ++ uint32_t sodecl[4]; ++ uint32_t sodecl_count; ++ uint32_t rast_stream; ++ uint32_t iface_bindings_count; ++ uint32_t iface_bindings; ++}; ++ + struct string_entry + { + struct rb_entry entry; +@@ -550,6 +560,8 @@ enum fx_4_type_constants + FX_4_ASSIGNMENT_VARIABLE = 0x2, + FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, + FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, ++ FX_4_ASSIGNMENT_INLINE_SHADER = 0x7, ++ FX_5_ASSIGNMENT_INLINE_SHADER = 0x8, + }; + + static const uint32_t fx_4_numeric_base_types[] = +@@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + vkd3d_unreachable(); + + case HLSL_CLASS_VOID: +@@ -1008,8 +1021,8 @@ static uint32_t get_fx_2_type_class(const struct hlsl_type *type) + return hlsl_sm1_class(type); + } + +-static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, +- struct fx_write_context *fx) ++static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, ++ const struct hlsl_semantic *semantic, bool is_combined_sampler, struct fx_write_context *fx) + { + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + uint32_t semantic_offset, offset, elements_count = 0, name_offset; +@@ -1025,7 +1038,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + name_offset = write_string(name, fx); + semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; + +- offset = put_u32(buffer, hlsl_sm1_base_type(type)); ++ offset = put_u32(buffer, hlsl_sm1_base_type(type, is_combined_sampler)); + put_u32(buffer, get_fx_2_type_class(type)); + put_u32(buffer, name_offset); + put_u32(buffer, semantic_offset); +@@ -1061,7 +1074,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + + /* Validated in check_invalid_object_fields(). */ + VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); +- write_fx_2_parameter(field->type, field->name, &field->semantic, fx); ++ write_fx_2_parameter(field->type, field->name, &field->semantic, false, fx); + } + } + +@@ -1298,6 +1311,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + /* This cannot appear as an extern variable. */ + break; + } +@@ -1321,7 +1335,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx) + if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) + continue; + +- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); ++ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); + value_offset = write_fx_2_initial_value(var, fx); + + flags = 0; +@@ -1344,7 +1358,7 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t desc_offset, value_offset; + +- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); ++ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); + value_offset = write_fx_2_initial_value(var, fx); + + put_u32(buffer, desc_offset); +@@ -1834,6 +1848,7 @@ enum state_property_component_type + FX_BLEND, + FX_VERTEXSHADER, + FX_PIXELSHADER, ++ FX_GEOMETRYSHADER, + FX_COMPONENT_TYPE_COUNT, + }; + +@@ -2065,6 +2080,7 @@ fx_4_states[] = + + { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, + { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, ++ { "GeometryShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_GEOMETRYSHADER, 1, 1, 8 }, + { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, + { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, + { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, +@@ -2951,7 +2967,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en + + static int fx_2_parse(struct fx_parser *parser) + { +- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented."); + + return -1; + } +@@ -3120,7 +3136,7 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) + else + { + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, +- "Only numeric and string types are supported in annotations.\n"); ++ "Only numeric and string types are supported in annotations."); + } + + if (type.element_count) +@@ -3210,27 +3226,13 @@ static void fx_parse_buffers(struct fx_parser *parser) + } + } + +-static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) ++static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) + { + struct vkd3d_shader_compile_info info = { 0 }; + struct vkd3d_shader_code output; +- uint32_t data_size, offset; + const void *data = NULL; + const char *p, *q, *end; +- struct fx_5_shader +- { +- uint32_t offset; +- uint32_t sodecl[4]; +- uint32_t sodecl_count; +- uint32_t rast_stream; +- uint32_t iface_bindings_count; +- uint32_t iface_bindings; +- } shader5; +- struct fx_4_gs_so +- { +- uint32_t offset; +- uint32_t sodecl; +- } gs_so; ++ uint32_t data_size; + int ret; + + static const struct vkd3d_shader_compile_option options[] = +@@ -3238,35 +3240,9 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + }; + +- switch (object_type) +- { +- case FX_4_OBJECT_TYPE_PIXEL_SHADER: +- case FX_4_OBJECT_TYPE_VERTEX_SHADER: +- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: +- offset = fx_parser_read_u32(parser); +- break; +- +- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: +- fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); +- offset = gs_so.offset; +- break; +- +- case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: +- case FX_5_OBJECT_TYPE_COMPUTE_SHADER: +- case FX_5_OBJECT_TYPE_HULL_SHADER: +- case FX_5_OBJECT_TYPE_DOMAIN_SHADER: +- fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); +- offset = shader5.offset; +- break; +- +- default: +- parser->failed = true; +- return; +- } +- +- fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); ++ fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size)); + if (data_size) +- data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); ++ data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size); + + if (!data) + return; +@@ -3283,7 +3259,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) + { + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, +- "Failed to disassemble shader blob.\n"); ++ "Failed to disassemble shader blob."); + return; + } + parse_fx_print_indent(parser); +@@ -3307,26 +3283,58 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + + parse_fx_print_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "}"); +- if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) ++ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) + { + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", +- fx_4_get_string(parser, gs_so.sodecl)); ++ fx_4_get_string(parser, shader->sodecl[0])); + } + else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) + { +- for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) ++ for (unsigned int i = 0; i < ARRAY_SIZE(shader->sodecl); ++i) + { +- if (shader5.sodecl[i]) ++ if (shader->sodecl[i]) + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", +- i, fx_4_get_string(parser, shader5.sodecl[i])); ++ i, fx_4_get_string(parser, shader->sodecl[i])); + } +- if (shader5.sodecl_count) +- vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); ++ if (shader->sodecl_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); + } + + vkd3d_shader_free_shader_code(&output); + } + ++static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) ++{ ++ struct fx_5_shader shader = { 0 }; ++ ++ switch (object_type) ++ { ++ case FX_4_OBJECT_TYPE_PIXEL_SHADER: ++ case FX_4_OBJECT_TYPE_VERTEX_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: ++ shader.offset = fx_parser_read_u32(parser); ++ break; ++ ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: ++ shader.offset = fx_parser_read_u32(parser); ++ shader.sodecl[0] = fx_parser_read_u32(parser); ++ break; ++ ++ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: ++ case FX_5_OBJECT_TYPE_HULL_SHADER: ++ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: ++ fx_parser_read_u32s(parser, &shader, sizeof(shader)); ++ break; ++ ++ default: ++ parser->failed = true; ++ return; ++ } ++ ++ fx_4_parse_shader_blob(parser, object_type, &shader); ++} ++ + static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) + { + switch (type->typeinfo) +@@ -3390,6 +3398,8 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 + [FX_UINT8] = "byte", + }; + const struct rhs_named_value *named_value; ++ struct fx_5_shader shader = { 0 }; ++ unsigned int shader_type = 0; + uint32_t i, j, comp_count; + struct fx_4_state *state; + +@@ -3400,7 +3410,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 + if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), + sizeof(*fx_4_states), fx_4_state_id_compare))) + { +- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.", entry.id); + break; + } + +@@ -3486,9 +3496,38 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 + vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), + fx_4_get_string(parser, index.index)); + break; ++ case FX_4_ASSIGNMENT_INLINE_SHADER: ++ case FX_5_ASSIGNMENT_INLINE_SHADER: ++ { ++ bool shader5 = entry.type == FX_5_ASSIGNMENT_INLINE_SHADER; ++ ++ if (shader5) ++ fx_parser_read_unstructured(parser, &shader, entry.value, sizeof(shader)); ++ else ++ fx_parser_read_unstructured(parser, &shader, entry.value, 2 * sizeof(uint32_t)); ++ ++ if (state->type == FX_PIXELSHADER) ++ shader_type = FX_4_OBJECT_TYPE_PIXEL_SHADER; ++ else if (state->type == FX_VERTEXSHADER) ++ shader_type = FX_4_OBJECT_TYPE_VERTEX_SHADER; ++ else if (state->type == FX_GEOMETRYSHADER) ++ shader_type = shader5 ? FX_5_OBJECT_TYPE_GEOMETRY_SHADER : FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO; ++ else if (state->type == FX_HULLSHADER) ++ shader_type = FX_5_OBJECT_TYPE_HULL_SHADER; ++ else if (state->type == FX_DOMAINSHADER) ++ shader_type = FX_5_OBJECT_TYPE_DOMAIN_SHADER; ++ else if (state->type == FX_COMPUTESHADER) ++ shader_type = FX_5_OBJECT_TYPE_COMPUTE_SHADER; ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_start_indent(parser); ++ fx_4_parse_shader_blob(parser, shader_type, &shader); ++ parse_fx_end_indent(parser); ++ break; ++ } + default: + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, +- "Unsupported assignment type %u.\n", entry.type); ++ "Unsupported assignment type %u.", entry.type); + } + vkd3d_string_buffer_printf(&parser->buffer, ";\n"); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 0df0e30f399..ab6604bd703 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -1507,13 +1507,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + case VKD3DSIH_DCL_INDEXABLE_TEMP: + shader_glsl_dcl_indexable_temp(gen, ins); + break; +- case VKD3DSIH_DCL_INPUT: +- case VKD3DSIH_DCL_INPUT_PS: +- case VKD3DSIH_DCL_INPUT_PS_SGV: +- case VKD3DSIH_DCL_INPUT_PS_SIV: +- case VKD3DSIH_DCL_INPUT_SGV: +- case VKD3DSIH_DCL_OUTPUT: +- case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_NOP: + break; + case VKD3DSIH_DEFAULT: +@@ -2476,7 +2469,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + +- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); + + vkd3d_glsl_generator_init(&generator, program, compile_info, + descriptor_info, combined_sampler_info, message_context); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 96de18dc886..84da2fcbc9f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -287,6 +287,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) + case HLSL_CLASS_UAV: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_VOID: + case HLSL_CLASS_NULL: + return false; +@@ -434,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -525,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_STREAM_OUTPUT: + break; + } + vkd3d_unreachable(); +@@ -680,6 +683,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + vkd3d_unreachable(); + } + type = next_type; +@@ -898,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba + return type; + } + ++struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, ++ enum hlsl_so_object_type so_type, struct hlsl_type *data_type) ++{ ++ struct hlsl_type *type; ++ ++ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) ++ return NULL; ++ type->class = HLSL_CLASS_STREAM_OUTPUT; ++ type->e.so.so_type = so_type; ++ type->e.so.type = data_type; ++ ++ list_add_tail(&ctx->types, &type->entry); ++ ++ return type; ++} ++ + struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + struct hlsl_struct_field *fields, size_t field_count) + { +@@ -1086,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_STREAM_OUTPUT: + break; + } + +@@ -1157,6 +1178,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + case HLSL_CLASS_CONSTANT_BUFFER: + return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); + ++ case HLSL_CLASS_STREAM_OUTPUT: ++ if (t1->e.so.so_type != t2->e.so.so_type) ++ return false; ++ return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); ++ + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: +@@ -1695,22 +1721,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * + return &s->node; + } + +-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, +- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_vsir_instruction_ref *vsir_instr; +- +- if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) +- return NULL; +- init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); +- vsir_instr->vsir_instr_idx = vsir_instr_idx; +- +- if (reg) +- vsir_instr->node.reg = *reg; +- +- return &vsir_instr->node; +-} +- + struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) + { +@@ -1844,22 +1854,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct + return &store->node; + } + +-struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, ++struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count, + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_swizzle *swizzle; + struct hlsl_type *type; + ++ VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR); ++ + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) + return NULL; +- VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); +- if (components == 1) ++ if (component_count > 1) ++ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); ++ else + type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); ++ init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); ++ hlsl_src_from_node(&swizzle->val, val); ++ swizzle->u.vector = s; ++ ++ return &swizzle->node; ++} ++ ++struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, ++ unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_swizzle *swizzle; ++ struct hlsl_type *type; ++ ++ VKD3D_ASSERT(val->data_type->class == HLSL_CLASS_MATRIX); ++ ++ if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) ++ return NULL; ++ if (component_count > 1) ++ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); + else +- type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); ++ type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); + init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); + hlsl_src_from_node(&swizzle->val, val); +- swizzle->swizzle = s; ++ swizzle->u.matrix = s; ++ + return &swizzle->node; + } + +@@ -2054,8 +2087,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type + return &jump->node; + } + +-struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, ++struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, ++ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_loop *loop; +@@ -2066,6 +2099,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + hlsl_block_init(&loop->body); + hlsl_block_add_block(&loop->body, block); + ++ hlsl_block_init(&loop->iter); ++ if (iter) ++ hlsl_block_add_block(&loop->iter, iter); ++ + loop->unroll_type = unroll_type; + loop->unroll_limit = unroll_limit; + return &loop->node; +@@ -2221,14 +2258,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ + + static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) + { ++ struct hlsl_block iter, body; + struct hlsl_ir_node *dst; +- struct hlsl_block body; ++ ++ if (!clone_block(ctx, &iter, &src->iter, map)) ++ return NULL; + + if (!clone_block(ctx, &body, &src->body, map)) ++ { ++ hlsl_block_cleanup(&iter); + return NULL; ++ } + +- if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) ++ if (!(dst = hlsl_new_loop(ctx, &iter, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + { ++ hlsl_block_cleanup(&iter); + hlsl_block_cleanup(&body); + return NULL; + } +@@ -2310,8 +2354,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr + static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_swizzle *src) + { +- return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, +- map_instr(map, src->val.node), &src->node.loc); ++ if (src->val.node->data_type->class == HLSL_CLASS_MATRIX) ++ return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->dimx, ++ map_instr(map, src->val.node), &src->node.loc); ++ else ++ return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->dimx, ++ map_instr(map, src->val.node), &src->node.loc); + } + + static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, +@@ -2533,9 +2581,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + + case HLSL_IR_STATEBLOCK_CONSTANT: + return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_unreachable(); + } + + vkd3d_unreachable(); +@@ -2693,10 +2738,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha + return NULL; + } + +-struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) ++static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl_type *type) + { +- struct vkd3d_string_buffer *string, *inner_string; +- + static const char *const base_types[] = + { + [HLSL_TYPE_FLOAT] = "float", +@@ -2720,31 +2763,28 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", + }; + +- if (!(string = hlsl_get_string_buffer(ctx))) +- return NULL; +- + if (type->name) + { +- vkd3d_string_buffer_printf(string, "%s", type->name); +- return string; ++ vkd3d_string_buffer_printf(buffer, "%s", type->name); ++ return; + } + + switch (type->class) + { + case HLSL_CLASS_SCALAR: + VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); +- vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); +- return string; ++ vkd3d_string_buffer_printf(buffer, "%s", base_types[type->e.numeric.type]); ++ return; + + case HLSL_CLASS_VECTOR: + VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); +- vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); +- return string; ++ vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->dimx); ++ return; + + case HLSL_CLASS_MATRIX: + VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); +- vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); +- return string; ++ vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); ++ return; + + case HLSL_CLASS_ARRAY: + { +@@ -2753,88 +2793,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) + ; + +- if ((inner_string = hlsl_type_to_string(ctx, t))) +- { +- vkd3d_string_buffer_printf(string, "%s", inner_string->buffer); +- hlsl_release_string_buffer(ctx, inner_string); +- } +- ++ hlsl_dump_type(buffer, t); + for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) + { + if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) +- vkd3d_string_buffer_printf(string, "[]"); ++ vkd3d_string_buffer_printf(buffer, "[]"); + else +- vkd3d_string_buffer_printf(string, "[%u]", t->e.array.elements_count); ++ vkd3d_string_buffer_printf(buffer, "[%u]", t->e.array.elements_count); + } +- return string; ++ return; + } + + case HLSL_CLASS_STRUCT: +- vkd3d_string_buffer_printf(string, ""); +- return string; ++ vkd3d_string_buffer_printf(buffer, ""); ++ return; + + case HLSL_CLASS_TEXTURE: + if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { +- vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); +- return string; ++ vkd3d_string_buffer_printf(buffer, "ByteAddressBuffer"); ++ return; + } + + if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { +- vkd3d_string_buffer_printf(string, "Texture"); +- return string; ++ vkd3d_string_buffer_printf(buffer, "Texture"); ++ return; + } + + VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); + VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + { +- vkd3d_string_buffer_printf(string, "Buffer"); ++ vkd3d_string_buffer_printf(buffer, "Buffer<"); + } + else + { + VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); +- vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); ++ vkd3d_string_buffer_printf(buffer, "Texture%s<", dimensions[type->sampler_dim]); + } +- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) +- { +- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); +- hlsl_release_string_buffer(ctx, inner_string); +- } +- return string; ++ hlsl_dump_type(buffer, type->e.resource.format); ++ vkd3d_string_buffer_printf(buffer, ">"); ++ return; + + case HLSL_CLASS_UAV: + if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) + { +- vkd3d_string_buffer_printf(string, "RWByteAddressBuffer"); +- return string; ++ vkd3d_string_buffer_printf(buffer, "RWByteAddressBuffer"); ++ return; + } + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) +- vkd3d_string_buffer_printf(string, "RWBuffer"); ++ vkd3d_string_buffer_printf(buffer, "RWBuffer<"); + else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); ++ vkd3d_string_buffer_printf(buffer, "RWStructuredBuffer<"); + else +- vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); +- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) +- { +- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); +- hlsl_release_string_buffer(ctx, inner_string); +- } +- return string; ++ vkd3d_string_buffer_printf(buffer, "RWTexture%s<", dimensions[type->sampler_dim]); ++ hlsl_dump_type(buffer, type->e.resource.format); ++ vkd3d_string_buffer_printf(buffer, ">"); ++ return; + + case HLSL_CLASS_CONSTANT_BUFFER: +- vkd3d_string_buffer_printf(string, "ConstantBuffer"); +- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) +- { +- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); +- hlsl_release_string_buffer(ctx, inner_string); +- } +- return string; ++ vkd3d_string_buffer_printf(buffer, "ConstantBuffer<"); ++ hlsl_dump_type(buffer, type->e.resource.format); ++ vkd3d_string_buffer_printf(buffer, ">"); ++ return; + + case HLSL_CLASS_ERROR: +- vkd3d_string_buffer_printf(string, ""); +- return string; ++ vkd3d_string_buffer_printf(buffer, ""); ++ return; ++ ++ case HLSL_CLASS_STREAM_OUTPUT: ++ if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) ++ vkd3d_string_buffer_printf(buffer, "PointStream<"); ++ else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) ++ vkd3d_string_buffer_printf(buffer, "LineStream<"); ++ else ++ vkd3d_string_buffer_printf(buffer, "TriangleStream<"); ++ hlsl_dump_type(buffer, type->e.so.type); ++ vkd3d_string_buffer_printf(buffer, ">"); ++ return; + + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: +@@ -2857,8 +2894,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + break; + } + +- vkd3d_string_buffer_printf(string, ""); +- return string; ++ vkd3d_string_buffer_printf(buffer, ""); ++} ++ ++struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) ++{ ++ struct vkd3d_string_buffer *buffer; ++ ++ if (!(buffer = hlsl_get_string_buffer(ctx))) ++ return NULL; ++ hlsl_dump_type(buffer, type); ++ return buffer; + } + + struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, +@@ -2968,7 +3014,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", + [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", + [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", +- [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", + }; + + if (type >= ARRAY_SIZE(names)) +@@ -3022,7 +3067,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer + vkd3d_string_buffer_printf(buffer, "%s ", string->buffer); + hlsl_release_string_buffer(ctx, string); + } +- vkd3d_string_buffer_printf(buffer, "%s %s", debug_hlsl_type(ctx, var->data_type), var->name); ++ hlsl_dump_type(buffer, var->data_type); ++ vkd3d_string_buffer_printf(buffer, " %s", var->name); + if (var->semantic.name) + vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index); + } +@@ -3103,34 +3149,28 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) + return vkd3d_dbg_sprintf(".%s", string); + } + +-static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) ++void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, ++ struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f) + { +- const struct hlsl_ir_function_decl *decl = call->decl; +- struct vkd3d_string_buffer *string; + size_t i; + +- if (!(string = hlsl_type_to_string(ctx, decl->return_type))) +- return; +- +- vkd3d_string_buffer_printf(buffer, "call %s %s(", string->buffer, decl->func->name); +- hlsl_release_string_buffer(ctx, string); +- +- for (i = 0; i < decl->parameters.count; ++i) ++ hlsl_dump_type(buffer, f->return_type); ++ vkd3d_string_buffer_printf(buffer, " %s(", f->func->name); ++ for (i = 0; i < f->parameters.count; ++i) + { +- const struct hlsl_ir_var *param = decl->parameters.vars[i]; +- +- if (!(string = hlsl_type_to_string(ctx, param->data_type))) +- return; +- + if (i) + vkd3d_string_buffer_printf(buffer, ", "); +- vkd3d_string_buffer_printf(buffer, "%s", string->buffer); +- +- hlsl_release_string_buffer(ctx, string); ++ dump_ir_var(ctx, buffer, f->parameters.vars[i]); + } + vkd3d_string_buffer_printf(buffer, ")"); + } + ++static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) ++{ ++ vkd3d_string_buffer_printf(buffer, "call "); ++ hlsl_dump_ir_function_decl(ctx, buffer, call->decl); ++} ++ + static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_constant *constant) + { + struct hlsl_type *type = constant->node.data_type; +@@ -3201,13 +3241,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_LOG2] = "log2", + [HLSL_OP1_LOGIC_NOT] = "!", + [HLSL_OP1_NEG] = "-", +- [HLSL_OP1_NRM] = "nrm", + [HLSL_OP1_RCP] = "rcp", + [HLSL_OP1_REINTERPRET] = "reinterpret", + [HLSL_OP1_ROUND] = "round", + [HLSL_OP1_RSQ] = "rsq", + [HLSL_OP1_SAT] = "sat", +- [HLSL_OP1_SIGN] = "sign", + [HLSL_OP1_SIN] = "sin", + [HLSL_OP1_SIN_REDUCED] = "sin_reduced", + [HLSL_OP1_SQRT] = "sqrt", +@@ -3217,7 +3255,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP2_BIT_AND] = "&", + [HLSL_OP2_BIT_OR] = "|", + [HLSL_OP2_BIT_XOR] = "^", +- [HLSL_OP2_CRS] = "crs", + [HLSL_OP2_DIV] = "/", + [HLSL_OP2_DOT] = "dot", + [HLSL_OP2_EQUAL] = "==", +@@ -3402,11 +3439,12 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls + { + vkd3d_string_buffer_printf(buffer, "."); + for (i = 0; i < swizzle->node.data_type->dimx; ++i) +- vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf); ++ vkd3d_string_buffer_printf(buffer, "_m%u%u", ++ swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x); + } + else + { +- vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx)); ++ vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->dimx)); + } + } + +@@ -3562,11 +3600,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + case HLSL_IR_STATEBLOCK_CONSTANT: + dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); + break; +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", +- hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); +- break; + } + } + +@@ -3719,6 +3752,7 @@ static void free_ir_load(struct hlsl_ir_load *load) + static void free_ir_loop(struct hlsl_ir_loop *loop) + { + hlsl_block_cleanup(&loop->body); ++ hlsl_block_cleanup(&loop->iter); + vkd3d_free(loop); + } + +@@ -3875,10 +3909,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + case HLSL_IR_STATEBLOCK_CONSTANT: + free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); + break; +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); +- break; + } + } + +@@ -3977,8 +4007,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function + + uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) + { ++ unsigned int src_component = 0; + uint32_t ret = 0; +- unsigned int i; + + /* Leave replicate swizzles alone; some instructions need them. */ + if (swizzle == HLSL_SWIZZLE(X, X, X, X) +@@ -3987,13 +4017,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) + || swizzle == HLSL_SWIZZLE(W, W, W, W)) + return swizzle; + +- for (i = 0; i < 4; ++i) ++ for (unsigned int dst_component = 0; dst_component < 4; ++dst_component) + { +- if (writemask & (1 << i)) +- { +- ret |= (swizzle & 3) << (i * 2); +- swizzle >>= 2; +- } ++ if (writemask & (1 << dst_component)) ++ hlsl_swizzle_set_component(&ret, dst_component, hlsl_swizzle_get_component(swizzle, src_component++)); + } + return ret; + } +@@ -4046,7 +4073,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim + for (i = 0; i < dim; ++i) + { + unsigned int s = hlsl_swizzle_get_component(second, i); +- ret |= hlsl_swizzle_get_component(first, s) << HLSL_SWIZZLE_SHIFT(i); ++ hlsl_swizzle_set_component(&ret, i, hlsl_swizzle_get_component(first, s)); + } + return ret; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 075c76cb0e2..7c9547a1c01 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -22,7 +22,6 @@ + + #include "vkd3d_shader_private.h" + #include "wine/rbtree.h" +-#include "d3dcommon.h" + #include "d3dx9shader.h" + + /* The general IR structure is inspired by Mesa GLSL hir, even though the code +@@ -51,31 +50,17 @@ + * DEALINGS IN THE SOFTWARE. + */ + +-#define HLSL_SWIZZLE_X (0u) +-#define HLSL_SWIZZLE_Y (1u) +-#define HLSL_SWIZZLE_Z (2u) +-#define HLSL_SWIZZLE_W (3u) +- +-#define HLSL_SWIZZLE(x, y, z, w) \ +- (((HLSL_SWIZZLE_ ## x) << 0) \ +- | ((HLSL_SWIZZLE_ ## y) << 2) \ +- | ((HLSL_SWIZZLE_ ## z) << 4) \ +- | ((HLSL_SWIZZLE_ ## w) << 6)) +- +-#define HLSL_SWIZZLE_MASK (0x3u) +-#define HLSL_SWIZZLE_SHIFT(idx) (2u * (idx)) ++#define HLSL_SWIZZLE VKD3D_SHADER_SWIZZLE + + static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned int idx) + { +- return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK; ++ return vsir_swizzle_get_component(swizzle, idx); + } + +-static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle) ++static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) + { +- return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0), +- hlsl_swizzle_get_component(swizzle, 1), +- hlsl_swizzle_get_component(swizzle, 2), +- hlsl_swizzle_get_component(swizzle, 3)); ++ *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); ++ *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); + } + + enum hlsl_type_class +@@ -105,6 +90,7 @@ enum hlsl_type_class + HLSL_CLASS_GEOMETRY_SHADER, + HLSL_CLASS_CONSTANT_BUFFER, + HLSL_CLASS_BLEND_STATE, ++ HLSL_CLASS_STREAM_OUTPUT, + HLSL_CLASS_VOID, + HLSL_CLASS_NULL, + HLSL_CLASS_ERROR, +@@ -142,6 +128,13 @@ enum hlsl_sampler_dim + /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ + }; + ++enum hlsl_so_object_type ++{ ++ HLSL_STREAM_OUTPUT_POINT_STREAM, ++ HLSL_STREAM_OUTPUT_LINE_STREAM, ++ HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, ++}; ++ + enum hlsl_regset + { + HLSL_REGSET_SAMPLERS, +@@ -220,6 +213,12 @@ struct hlsl_type + } resource; + /* Additional field to distinguish object types. Currently used only for technique types. */ + unsigned int version; ++ /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ ++ struct ++ { ++ struct hlsl_type *type; ++ enum hlsl_so_object_type so_type; ++ } so; + } e; + + /* Number of numeric register components used by one value of this type, for each regset. +@@ -330,8 +329,6 @@ enum hlsl_ir_node_type + HLSL_IR_COMPILE, + HLSL_IR_SAMPLER_STATE, + HLSL_IR_STATEBLOCK_CONSTANT, +- +- HLSL_IR_VSIR_INSTRUCTION_REF, + }; + + /* Common data for every type of IR instruction node. */ +@@ -524,6 +521,10 @@ struct hlsl_ir_var + * element of a struct, and thus needs to be aligned when packed in the signature. */ + bool force_align; + ++ /* Whether this is a sampler that was created from the combination of a ++ * sampler and a texture for SM<4 backwards compatibility. */ ++ bool is_combined_sampler; ++ + uint32_t is_input_semantic : 1; + uint32_t is_output_semantic : 1; + uint32_t is_uniform : 1; +@@ -644,21 +645,30 @@ struct hlsl_ir_if + struct hlsl_block else_block; + }; + +-enum hlsl_ir_loop_unroll_type ++enum hlsl_loop_unroll_type ++{ ++ HLSL_LOOP_UNROLL, ++ HLSL_LOOP_FORCE_UNROLL, ++ HLSL_LOOP_FORCE_LOOP ++}; ++ ++enum hlsl_loop_type + { +- HLSL_IR_LOOP_UNROLL, +- HLSL_IR_LOOP_FORCE_UNROLL, +- HLSL_IR_LOOP_FORCE_LOOP ++ HLSL_LOOP_FOR, ++ HLSL_LOOP_WHILE, ++ HLSL_LOOP_DO_WHILE + }; + + struct hlsl_ir_loop + { + struct hlsl_ir_node node; ++ struct hlsl_block iter; + /* loop condition is stored in the body (as "if (!condition) break;") */ + struct hlsl_block body; ++ enum hlsl_loop_type type; + unsigned int next_index; /* liveness index of the end of the loop */ + unsigned int unroll_limit; +- enum hlsl_ir_loop_unroll_type unroll_type; ++ enum hlsl_loop_unroll_type unroll_type; + }; + + struct hlsl_ir_switch_case +@@ -703,13 +713,11 @@ enum hlsl_ir_expr_op + HLSL_OP1_LOG2, + HLSL_OP1_LOGIC_NOT, + HLSL_OP1_NEG, +- HLSL_OP1_NRM, + HLSL_OP1_RCP, + HLSL_OP1_REINTERPRET, + HLSL_OP1_ROUND, + HLSL_OP1_RSQ, + HLSL_OP1_SAT, +- HLSL_OP1_SIGN, + HLSL_OP1_SIN, + HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ + HLSL_OP1_SQRT, +@@ -719,7 +727,6 @@ enum hlsl_ir_expr_op + HLSL_OP2_BIT_AND, + HLSL_OP2_BIT_OR, + HLSL_OP2_BIT_XOR, +- HLSL_OP2_CRS, + HLSL_OP2_DIV, + HLSL_OP2_DOT, + HLSL_OP2_EQUAL, +@@ -781,7 +788,17 @@ struct hlsl_ir_swizzle + { + struct hlsl_ir_node node; + struct hlsl_src val; +- uint32_t swizzle; ++ union ++ { ++ uint32_t vector; ++ struct hlsl_matrix_swizzle ++ { ++ struct ++ { ++ uint8_t x, y; ++ } components[4]; ++ } matrix; ++ } u; + }; + + struct hlsl_ir_index +@@ -934,16 +951,6 @@ struct hlsl_ir_stateblock_constant + char *name; + }; + +-/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. +- * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ +-struct hlsl_ir_vsir_instruction_ref +-{ +- struct hlsl_ir_node node; +- +- /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ +- unsigned int vsir_instr_idx; +-}; +- + struct hlsl_scope + { + /* Item entry for hlsl_ctx.scopes. */ +@@ -1259,12 +1266,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co + return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); + } + +-static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) +-{ +- VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); +- return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); +-} +- + static inline void hlsl_block_init(struct hlsl_block *block) + { + list_init(&block->instrs); +@@ -1442,6 +1443,8 @@ void hlsl_block_cleanup(struct hlsl_block *block); + bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); + + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); ++void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, ++ struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f); + void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); + + bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, +@@ -1519,6 +1522,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond + struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); ++struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, ++ enum hlsl_so_object_type so_type, struct hlsl_type *type); + struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); + +@@ -1550,8 +1555,11 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty + struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +-struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, ++ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, ++ unsigned int unroll_limit, const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, ++ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, + const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +@@ -1588,9 +1596,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned + struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, + struct list *cases, const struct vkd3d_shader_location *loc); + +-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, +- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); +- + void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); + void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, +@@ -1645,21 +1650,39 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); + bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); ++bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); + bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), + struct hlsl_block *block, void *context); + + D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); +-D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); ++D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler); + +-void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); +-int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, +- const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); ++struct extern_resource ++{ ++ /* "var" is only not NULL if this resource is a whole variable, so it may ++ * be responsible for more than one component. */ ++ const struct hlsl_ir_var *var; ++ const struct hlsl_buffer *buffer; ++ ++ char *name; ++ bool is_user_packed; ++ ++ /* The data type of a single component of the resource. This might be ++ * different from the data type of the resource itself in 4.0 profiles, ++ * where an array (or multi-dimensional array) is handled as a single ++ * resource, unlike in 5.0. */ ++ struct hlsl_type *component_type; ++ ++ enum hlsl_regset regset; ++ unsigned int id, space, index, bind_count; ++ ++ struct vkd3d_shader_location loc; ++}; + +-int tpf_compile(struct vsir_program *program, uint64_t config_flags, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, +- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); ++struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count); ++void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count); ++void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef); + + enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, + unsigned int storage_modifiers); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index 8dace11916a..31fb30521e9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -104,6 +104,7 @@ if {return KW_IF; } + in {return KW_IN; } + inline {return KW_INLINE; } + inout {return KW_INOUT; } ++LineStream {return KW_LINESTREAM; } + linear {return KW_LINEAR; } + matrix {return KW_MATRIX; } + namespace {return KW_NAMESPACE; } +@@ -114,6 +115,7 @@ out {return KW_OUT; } + packoffset {return KW_PACKOFFSET; } + pass {return KW_PASS; } + PixelShader {return KW_PIXELSHADER; } ++PointStream {return KW_POINTSTREAM; } + pixelshader {return KW_PIXELSHADER; } + RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } + RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } +@@ -170,6 +172,7 @@ texture3D {return KW_TEXTURE3D; } + TextureCube {return KW_TEXTURECUBE; } + textureCUBE {return KW_TEXTURECUBE; } + TextureCubeArray {return KW_TEXTURECUBEARRAY; } ++TriangleStream {return KW_TRIANGLESTREAM; } + true {return KW_TRUE; } + typedef {return KW_TYPEDEF; } + unsigned {return KW_UNSIGNED; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 60aade732db..a3814a810b5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -475,7 +475,11 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo + for (i = 0; i < arrays->count; ++i) + { + if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) ++ { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); ++ dst_type = ctx->builtin_types.error; ++ break; ++ } + dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); + } + +@@ -551,13 +555,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co + return true; + } + +-enum loop_type +-{ +- LOOP_FOR, +- LOOP_WHILE, +- LOOP_DO_WHILE +-}; +- + static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) + { + unsigned int i, j; +@@ -573,8 +570,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru + } + } + +-static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, +- struct hlsl_block *cond, struct hlsl_block *iter) ++static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ enum hlsl_loop_type type, struct hlsl_block *cond) + { + struct hlsl_ir_node *instr, *next; + +@@ -584,8 +581,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + +- resolve_loop_continue(ctx, &iff->then_block, type, cond, iter); +- resolve_loop_continue(ctx, &iff->else_block, type, cond, iter); ++ resolve_loop_continue(ctx, &iff->then_block, type, cond); ++ resolve_loop_continue(ctx, &iff->else_block, type, cond); + } + else if (instr->type == HLSL_IR_JUMP) + { +@@ -595,7 +592,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block + if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) + continue; + +- if (type == LOOP_DO_WHILE) ++ if (type == HLSL_LOOP_DO_WHILE) + { + if (!hlsl_clone_block(ctx, &cond_block, cond)) + return; +@@ -606,13 +603,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block + } + list_move_before(&instr->entry, &cond_block.instrs); + } +- else if (type == LOOP_FOR) +- { +- if (!hlsl_clone_block(ctx, &cond_block, iter)) +- return; +- list_move_before(&instr->entry, &cond_block.instrs); +- } +- jump->type = HLSL_IR_JUMP_CONTINUE; + } + } + } +@@ -678,8 +668,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + break; +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_unreachable(); + } + } + +@@ -738,11 +726,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str + return res.number.u; + } + +-static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, ++static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type type, + const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, + struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) + { +- enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; ++ enum hlsl_loop_unroll_type unroll_type = HLSL_LOOP_UNROLL; + unsigned int i, unroll_limit = 0; + struct hlsl_ir_node *loop; + +@@ -773,11 +761,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + hlsl_block_cleanup(&expr); + } + +- unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; ++ unroll_type = HLSL_LOOP_FORCE_UNROLL; + } + else if (!strcmp(attr->name, "loop")) + { +- unroll_type = HLSL_IR_LOOP_FORCE_LOOP; ++ unroll_type = HLSL_LOOP_FORCE_LOOP; + } + else if (!strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) +@@ -790,7 +778,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + } + } + +- resolve_loop_continue(ctx, body, type, cond, iter); ++ resolve_loop_continue(ctx, body, type, cond); + + if (!init && !(init = make_empty_block(ctx))) + goto oom; +@@ -798,15 +786,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + if (!append_conditional_break(ctx, cond)) + goto oom; + +- if (iter) +- hlsl_block_add_block(body, iter); +- +- if (type == LOOP_DO_WHILE) ++ if (type == HLSL_LOOP_DO_WHILE) + list_move_tail(&body->instrs, &cond->instrs); + else + list_move_head(&body->instrs, &cond->instrs); + +- if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) ++ if (!(loop = hlsl_new_loop(ctx, iter, body, unroll_type, unroll_limit, loc))) + goto oom; + hlsl_block_add_instr(init, loop); + +@@ -860,6 +845,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod + if (value->data_type->class == HLSL_CLASS_MATRIX) + { + /* Matrix swizzle */ ++ struct hlsl_matrix_swizzle s; + bool m_swizzle; + unsigned int inc, x, y; + +@@ -890,10 +876,11 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod + + if (x >= value->data_type->dimx || y >= value->data_type->dimy) + return NULL; +- swiz |= (y << 4 | x) << component * 8; ++ s.components[component].x = x; ++ s.components[component].y = y; + component++; + } +- return hlsl_new_swizzle(ctx, swiz, component, value, loc); ++ return hlsl_new_matrix_swizzle(ctx, s, component, value, loc); + } + + /* Vector swizzle */ +@@ -922,8 +909,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod + + if (s >= value->data_type->dimx) + return NULL; +- swiz |= s << component * 2; +- component++; ++ hlsl_swizzle_set_component(&swiz, component++, s); + } + if (valid) + return hlsl_new_swizzle(ctx, swiz, component, value, loc); +@@ -1192,6 +1178,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Implicit size arrays not allowed in struct fields."); ++ field->type = ctx->builtin_types.error; ++ break; + } + + field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); +@@ -1282,6 +1270,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Implicit size arrays not allowed in typedefs."); ++ if (!(type = hlsl_type_clone(ctx, ctx->builtin_types.error, 0, 0))) ++ { ++ free_parse_variable_def(v); ++ ret = false; ++ } ++ break; + } + + if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]))) +@@ -2092,8 +2086,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned + { + if (*writemask & (1 << i)) + { +- unsigned int s = (*swizzle >> (i * 2)) & 3; +- new_swizzle |= s << (bit++ * 2); ++ unsigned int s = hlsl_swizzle_get_component(*swizzle, i); ++ hlsl_swizzle_set_component(&new_swizzle, bit++, s); + if (new_writemask & (1 << s)) + return false; + new_writemask |= 1 << s; +@@ -2107,9 +2101,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned + { + for (j = 0; j < width; ++j) + { +- unsigned int s = (new_swizzle >> (j * 2)) & 3; ++ unsigned int s = hlsl_swizzle_get_component(new_swizzle, j); + if (s == i) +- inverted |= j << (bit++ * 2); ++ hlsl_swizzle_set_component(&inverted, bit++, j); + } + } + +@@ -2119,22 +2113,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned + return true; + } + +-static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) ++static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle, ++ uint32_t *ret_inverted, unsigned int *writemask, unsigned int *ret_width) + { +- /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. +- * components are indexed by their sources. i.e. the first component comes from the first +- * component of the rhs. */ +- unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; ++ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0; ++ struct hlsl_matrix_swizzle new_swizzle = {0}; + + /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ + for (i = 0; i < 4; ++i) + { + if (*writemask & (1 << i)) + { +- unsigned int s = (*swizzle >> (i * 8)) & 0xff; +- unsigned int x = s & 0xf, y = (s >> 4) & 0xf; ++ unsigned int x = swizzle->components[i].x; ++ unsigned int y = swizzle->components[i].y; + unsigned int idx = x + y * 4; +- new_swizzle |= s << (bit++ * 8); ++ ++ new_swizzle.components[bit++] = swizzle->components[i]; + if (new_writemask & (1 << idx)) + return false; + new_writemask |= 1 << idx; +@@ -2142,22 +2136,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un + } + width = bit; + +- /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the +- * incoming vector. */ ++ /* Then we invert the swizzle. The resulting swizzle uses a uint32_t ++ * vector format, because it's for the incoming vector. */ + bit = 0; + for (i = 0; i < 16; ++i) + { + for (j = 0; j < width; ++j) + { +- unsigned int s = (new_swizzle >> (j * 8)) & 0xff; +- unsigned int x = s & 0xf, y = (s >> 4) & 0xf; ++ unsigned int x = new_swizzle.components[j].x; ++ unsigned int y = new_swizzle.components[j].y; + unsigned int idx = x + y * 4; + if (idx == i) +- inverted |= j << (bit++ * 2); ++ hlsl_swizzle_set_component(&inverted, bit++, j); + } + } + +- *swizzle = inverted; ++ *ret_inverted = inverted; + *writemask = new_writemask; + *ret_width = width; + return true; +@@ -2211,28 +2205,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc + { + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); + struct hlsl_ir_node *new_swizzle; +- uint32_t s = swizzle->swizzle; ++ uint32_t s; + + VKD3D_ASSERT(!matrix_writemask); + + if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) + { ++ struct hlsl_matrix_swizzle ms = swizzle->u.matrix; ++ + if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) + { + hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); + return false; + } +- if (!invert_swizzle_matrix(&s, &writemask, &width)) ++ if (!invert_swizzle_matrix(&ms, &s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); + return false; + } + matrix_writemask = true; + } +- else if (!invert_swizzle(&s, &writemask, &width)) ++ else + { +- hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); +- return false; ++ s = swizzle->u.vector; ++ if (!invert_swizzle(&s, &writemask, &width)) ++ { ++ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); ++ return false; ++ } + } + + if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) +@@ -2670,26 +2670,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Only innermost array size can be implicit."); +- v->initializer.args_count = 0; ++ type = ctx->builtin_types.error; ++ break; + } + else if (elem_components == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot declare an implicit size array of a size 0 type."); +- v->initializer.args_count = 0; ++ type = ctx->builtin_types.error; ++ break; + } + else if (size == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Implicit size arrays need to be initialized."); +- v->initializer.args_count = 0; ++ type = ctx->builtin_types.error; ++ break; + } + else if (size % elem_components != 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Cannot initialize implicit size array with %u components, expected a multiple of %u.", + size, elem_components); +- v->initializer.args_count = 0; ++ type = ctx->builtin_types.error; ++ break; + } + else + { +@@ -2908,7 +2912,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + v->initializer.args[0] = node_from_block(v->initializer.instrs); + } + +- initialize_var(ctx, var, &v->initializer, is_default_values_initializer); ++ if (var->data_type->class != HLSL_CLASS_ERROR) ++ initialize_var(ctx, var, &v->initializer, is_default_values_initializer); + + if (is_default_values_initializer) + { +@@ -2993,13 +2998,137 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, const struct hlsl_ir_ + return true; + } + ++static enum hlsl_base_type hlsl_base_type_class(enum hlsl_base_type t) ++{ ++ switch (t) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_DOUBLE: ++ return HLSL_TYPE_FLOAT; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ return HLSL_TYPE_INT; ++ ++ case HLSL_TYPE_BOOL: ++ return HLSL_TYPE_BOOL; ++ } ++ ++ return 0; ++} ++ ++static unsigned int hlsl_base_type_width(enum hlsl_base_type t) ++{ ++ switch (t) ++ { ++ case HLSL_TYPE_HALF: ++ return 16; ++ ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ return 32; ++ ++ case HLSL_TYPE_DOUBLE: ++ return 64; ++ } ++ ++ return 0; ++} ++ ++static int function_parameter_compare(const struct hlsl_ir_var *candidate, ++ const struct hlsl_ir_var *ref, const struct hlsl_ir_node *arg) ++{ ++ struct ++ { ++ enum hlsl_base_type type; ++ enum hlsl_base_type class; ++ unsigned int count, width; ++ } c, r, a; ++ int ret; ++ ++ /* TODO: Non-numeric types. */ ++ if (!hlsl_is_numeric_type(arg->data_type)) ++ return 0; ++ ++ c.type = candidate->data_type->e.numeric.type; ++ c.class = hlsl_base_type_class(c.type); ++ c.count = hlsl_type_component_count(candidate->data_type); ++ c.width = hlsl_base_type_width(c.type); ++ ++ r.type = ref->data_type->e.numeric.type; ++ r.class = hlsl_base_type_class(r.type); ++ r.count = hlsl_type_component_count(ref->data_type); ++ r.width = hlsl_base_type_width(r.type); ++ ++ a.type = arg->data_type->e.numeric.type; ++ a.class = hlsl_base_type_class(a.type); ++ a.count = hlsl_type_component_count(arg->data_type); ++ a.width = hlsl_base_type_width(a.type); ++ ++ /* Prefer candidates without component count narrowing. E.g., given an ++ * float4 argument, half4 is a better match than float2. */ ++ if ((ret = (a.count > r.count) - (a.count > c.count))) ++ return ret; ++ ++ /* Prefer candidates with matching component type classes. E.g., given a ++ * float argument, double is a better match than int. */ ++ if ((ret = (a.class == c.class) - (a.class == r.class))) ++ return ret; ++ ++ /* Prefer candidates with matching component types. E.g., given an int ++ * argument, int4 is a better match than uint4. */ ++ if ((ret = (a.type == c.type) - (a.type == r.type))) ++ return ret; ++ ++ /* Prefer candidates without component type narrowing. E.g., given a float ++ * argument, double is a better match than half. */ ++ if ((ret = (a.width > r.width) - (a.width > c.width))) ++ return ret; ++ ++ /* Prefer candidates without component count widening. E.g. given a float ++ * argument, float is a better match than float2. */ ++ return (a.count < r.count) - (a.count < c.count); ++} ++ ++static int function_compare(const struct hlsl_ir_function_decl *candidate, ++ const struct hlsl_ir_function_decl *ref, const struct parse_initializer *args) ++{ ++ bool any_worse = false, any_better = false; ++ unsigned int i; ++ int ret; ++ ++ for (i = 0; i < args->args_count; ++i) ++ { ++ ret = function_parameter_compare(candidate->parameters.vars[i], ref->parameters.vars[i], args->args[i]); ++ if (ret < 0) ++ any_worse = true; ++ else if (ret > 0) ++ any_better = true; ++ } ++ ++ /* We consider a candidate better if at least one parameter is a better ++ * match, and none are a worse match. */ ++ return any_better - any_worse; ++} ++ + static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, + const char *name, const struct parse_initializer *args, bool is_compile, + const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_function_decl *decl, *compatible_match = NULL; ++ struct hlsl_ir_function_decl *decl; ++ struct vkd3d_string_buffer *s; + struct hlsl_ir_function *func; + struct rb_entry *entry; ++ int compare; ++ size_t i; ++ struct ++ { ++ struct hlsl_ir_function_decl **candidates; ++ size_t count, capacity; ++ } candidates = {0}; + + if (!(entry = rb_get(&ctx->functions, name))) + return NULL; +@@ -3007,18 +3136,58 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, + + LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) + { +- if (func_is_compatible_match(ctx, decl, is_compile, args)) ++ if (!func_is_compatible_match(ctx, decl, is_compile, args)) ++ continue; ++ ++ if (candidates.count) + { +- if (compatible_match) ++ compare = function_compare(decl, candidates.candidates[0], args); ++ ++ /* The candidate is worse; skip it. */ ++ if (compare < 0) ++ continue; ++ ++ /* The candidate is better; replace the current candidates. */ ++ if (compare > 0) + { +- hlsl_fixme(ctx, loc, "Prioritize between multiple compatible function overloads."); +- break; ++ candidates.candidates[0] = decl; ++ candidates.count = 1; ++ continue; ++ } ++ } ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&candidates.candidates, ++ &candidates.capacity, candidates.count + 1, sizeof(decl)))) ++ { ++ vkd3d_free(candidates.candidates); ++ return NULL; ++ } ++ candidates.candidates[candidates.count++] = decl; ++ } ++ ++ if (!candidates.count) ++ return NULL; ++ ++ if (candidates.count > 1) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL, "Ambiguous function call."); ++ if ((s = hlsl_get_string_buffer(ctx))) ++ { ++ hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, "Candidates are:"); ++ for (i = 0; i < candidates.count; ++i) ++ { ++ hlsl_dump_ir_function_decl(ctx, s, candidates.candidates[i]); ++ hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, " %s;", s->buffer); ++ vkd3d_string_buffer_clear(s); + } +- compatible_match = decl; ++ hlsl_release_string_buffer(ctx, s); + } + } + +- return compatible_match; ++ decl = candidates.candidates[0]; ++ vkd3d_free(candidates.candidates); ++ ++ return decl; + } + + static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) +@@ -5447,6 +5616,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + ++ if (!hlsl_is_numeric_type(type)) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Constructor data type %s is not numeric.", string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ return NULL; ++ } ++ + if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) + return NULL; + +@@ -6553,6 +6733,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct hlsl_semantic semantic; + enum hlsl_buffer_type buffer_type; + enum hlsl_sampler_dim sampler_dim; ++ enum hlsl_so_object_type so_type; + struct hlsl_attribute *attr; + struct parse_attribute_list attr_list; + struct hlsl_ir_switch_case *switch_case; +@@ -6596,6 +6777,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_INLINE + %token KW_INOUT + %token KW_LINEAR ++%token KW_LINESTREAM + %token KW_MATRIX + %token KW_NAMESPACE + %token KW_NOINTERPOLATION +@@ -6605,6 +6787,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_PACKOFFSET + %token KW_PASS + %token KW_PIXELSHADER ++%token KW_POINTSTREAM + %token KW_RASTERIZERORDEREDBUFFER + %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER + %token KW_RASTERIZERORDEREDTEXTURE1D +@@ -6654,6 +6837,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_TEXTURE3D + %token KW_TEXTURECUBE + %token KW_TEXTURECUBEARRAY ++%token KW_TRIANGLESTREAM + %token KW_TRUE + %token KW_TYPEDEF + %token KW_UNSIGNED +@@ -6784,6 +6968,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type semantic + ++%type so_type ++ + %type state_block + + %type state_block_index_opt +@@ -7684,7 +7870,10 @@ parameter_decl: + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Implicit size arrays not allowed in function parameters."); ++ type = ctx->builtin_types.error; ++ break; + } ++ + type = hlsl_new_array_type(ctx, type, $4.sizes[i]); + } + vkd3d_free($4.sizes); +@@ -7805,6 +7994,20 @@ rov_type: + $$ = HLSL_SAMPLER_DIM_3D; + } + ++so_type: ++ KW_POINTSTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; ++ } ++ | KW_LINESTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; ++ } ++ | KW_TRIANGLESTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; ++ } ++ + resource_format: + var_modifiers type + { +@@ -7948,6 +8151,10 @@ type_no_void: + validate_uav_type(ctx, $1, $3, &@4); + $$ = hlsl_new_uav_type(ctx, $1, $3, true); + } ++ | so_type '<' type '>' ++ { ++ $$ = hlsl_new_stream_output_type(ctx, $1, $3); ++ } + | KW_RWBYTEADDRESSBUFFER + { + $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); +@@ -8088,14 +8295,9 @@ typedef: + } + + if (modifiers) +- { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Storage modifiers are not allowed on typedefs."); +- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, $4, struct parse_variable_def, entry) +- vkd3d_free(v); +- vkd3d_free($4); +- YYABORT; +- } ++ + if (!add_typedef(ctx, type, $4)) + YYABORT; + } +@@ -8753,25 +8955,25 @@ if_body: + loop_statement: + attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement + { +- $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); ++ $$ = create_loop(ctx, HLSL_LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); + hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); + } + | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' + { +- $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); ++ $$ = create_loop(ctx, HLSL_LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); + hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); + } + | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement + { +- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); ++ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); + } + | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement + { +- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); ++ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); + hlsl_pop_scope(ctx); + cleanup_parse_attribute_list(&$1); + } +@@ -8979,17 +9181,24 @@ primary_expr: + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + +- if (!(var = hlsl_get_var(ctx->cur_scope, $1))) ++ if ((var = hlsl_get_var(ctx->cur_scope, $1))) ++ { ++ vkd3d_free($1); ++ ++ if (!(load = hlsl_new_var_load(ctx, var, &@1))) ++ YYABORT; ++ if (!($$ = make_block(ctx, &load->node))) ++ YYABORT; ++ } ++ else + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); + vkd3d_free($1); +- YYABORT; ++ ++ if (!($$ = make_empty_block(ctx))) ++ YYABORT; ++ $$->value = ctx->error_instr; + } +- vkd3d_free($1); +- if (!(load = hlsl_new_var_load(ctx, var, &@1))) +- YYABORT; +- if (!($$ = make_block(ctx, &load->node))) +- YYABORT; + } + | '(' expr ')' + { +@@ -9149,23 +9358,8 @@ postfix_expr: + | var_modifiers type '(' initializer_expr_list ')' + { + if ($1) +- { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on constructors."); +- free_parse_initializer(&$4); +- YYABORT; +- } +- if (!hlsl_is_numeric_type($2)) +- { +- struct vkd3d_string_buffer *string; +- +- if ((string = hlsl_type_to_string(ctx, $2))) +- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Constructor data type %s is not numeric.", string->buffer); +- hlsl_release_string_buffer(ctx, string); +- free_parse_initializer(&$4); +- YYABORT; +- } + + if (!($$ = add_constructor(ctx, $2, &$4, &@2))) + { +@@ -9233,11 +9427,8 @@ unary_expr: + | '(' var_modifiers type arrays ')' unary_expr + { + if ($2) +- { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on casts."); +- YYABORT; +- } + + if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) + { +@@ -9381,10 +9572,7 @@ assignment_expr: + struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); + + if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) +- { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); +- YYABORT; +- } + hlsl_block_add_block($3, $1); + destroy_block($1); + if (!add_assignment(ctx, $3, lhs, $2, rhs)) +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index d11ff481f6b..a43ea53089e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -19,6 +19,7 @@ + */ + + #include "hlsl.h" ++#include "vkd3d_shader_private.h" + #include + #include + +@@ -1075,7 +1076,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + struct hlsl_deref var_deref; + struct hlsl_type *matrix_type; + struct hlsl_ir_var *var; +- unsigned int x, y, k, i; ++ unsigned int k, i; + + if (instr->type != HLSL_IR_SWIZZLE) + return false; +@@ -1093,9 +1094,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins + struct hlsl_block store_block; + struct hlsl_ir_node *load; + +- y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; +- x = (swizzle->swizzle >> 8 * i) & 0xf; +- k = y * matrix_type->dimx + x; ++ k = swizzle->u.matrix.components[i].y * matrix_type->dimx + swizzle->u.matrix.components[i].x; + + if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) + return false; +@@ -1358,8 +1357,10 @@ struct copy_propagation_var_def + + struct copy_propagation_state + { +- struct rb_tree var_defs; +- struct copy_propagation_state *parent; ++ struct rb_tree *scope_var_defs; ++ size_t scope_count, scopes_capacity; ++ struct hlsl_ir_node *stop; ++ bool stopped; + }; + + static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) +@@ -1381,6 +1382,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte + vkd3d_free(var_def); + } + ++static size_t copy_propagation_push_scope(struct copy_propagation_state *state, struct hlsl_ctx *ctx) ++{ ++ if (!(hlsl_array_reserve(ctx, (void **)&state->scope_var_defs, &state->scopes_capacity, ++ state->scope_count + 1, sizeof(*state->scope_var_defs)))) ++ return false; ++ ++ rb_init(&state->scope_var_defs[state->scope_count++], copy_propagation_var_def_compare); ++ ++ return state->scope_count; ++} ++ ++static size_t copy_propagation_pop_scope(struct copy_propagation_state *state) ++{ ++ rb_destroy(&state->scope_var_defs[--state->scope_count], copy_propagation_var_def_destroy, NULL); ++ ++ return state->scope_count; ++} ++ ++static bool copy_propagation_state_init(struct copy_propagation_state *state, struct hlsl_ctx *ctx) ++{ ++ memset(state, 0, sizeof(*state)); ++ ++ return copy_propagation_push_scope(state, ctx); ++} ++ ++static void copy_propagation_state_destroy(struct copy_propagation_state *state) ++{ ++ while (copy_propagation_pop_scope(state)); ++ ++ vkd3d_free(state->scope_var_defs); ++} ++ + static struct copy_propagation_value *copy_propagation_get_value_at_time( + struct copy_propagation_component_trace *trace, unsigned int time) + { +@@ -1398,9 +1431,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( + static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state, + const struct hlsl_ir_var *var, unsigned int component, unsigned int time) + { +- for (; state; state = state->parent) ++ for (size_t i = state->scope_count - 1; i < state->scope_count; i--) + { +- struct rb_entry *entry = rb_get(&state->var_defs, var); ++ struct rb_tree *tree = &state->scope_var_defs[i]; ++ struct rb_entry *entry = rb_get(tree, var); + if (entry) + { + struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); +@@ -1426,7 +1460,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co + static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, + struct copy_propagation_state *state, struct hlsl_ir_var *var) + { +- struct rb_entry *entry = rb_get(&state->var_defs, var); ++ struct rb_tree *tree = &state->scope_var_defs[state->scope_count - 1]; ++ struct rb_entry *entry = rb_get(tree, var); + struct copy_propagation_var_def *var_def; + unsigned int component_count = hlsl_type_component_count(var->data_type); + int res; +@@ -1439,7 +1474,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h + + var_def->var = var; + +- res = rb_put(&state->var_defs, var, &var_def->entry); ++ res = rb_put(tree, var, &var_def->entry); + VKD3D_ASSERT(!res); + + return var_def; +@@ -1596,7 +1631,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, + var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count)); + return false; + } +- ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i); ++ hlsl_swizzle_set_component(&ret_swizzle, i, value->component); + } + + TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", +@@ -1678,6 +1713,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + +@@ -1719,10 +1755,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, + return false; + load = hlsl_ir_load(swizzle->val.node); + +- if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->swizzle, &swizzle->node)) ++ if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node)) + return true; + +- if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node)) ++ if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node)) + return true; + + return false; +@@ -1818,18 +1854,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s + } + } + +-static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state, +- struct copy_propagation_state *parent) +-{ +- rb_init(&state->var_defs, copy_propagation_var_def_compare); +- state->parent = parent; +-} +- +-static void copy_propagation_state_destroy(struct copy_propagation_state *state) +-{ +- rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL); +-} +- + static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, + struct hlsl_block *block, unsigned int time) + { +@@ -1898,16 +1922,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b + static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, + struct copy_propagation_state *state) + { +- struct copy_propagation_state inner_state; + bool progress = false; + +- copy_propagation_state_init(ctx, &inner_state, state); +- progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); +- copy_propagation_state_destroy(&inner_state); ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &iff->then_block, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); + +- copy_propagation_state_init(ctx, &inner_state, state); +- progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); +- copy_propagation_state_destroy(&inner_state); ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &iff->else_block, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); + + /* Ideally we'd invalidate the outer state looking at what was + * touched in the two inner states, but this doesn't work for +@@ -1922,14 +1949,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if + static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, + struct copy_propagation_state *state) + { +- struct copy_propagation_state inner_state; + bool progress = false; + + copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index); ++ copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index); + +- copy_propagation_state_init(ctx, &inner_state, state); +- progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state); +- copy_propagation_state_destroy(&inner_state); ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &loop->body, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); + + return progress; + } +@@ -1937,15 +1966,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l + static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, + struct copy_propagation_state *state) + { +- struct copy_propagation_state inner_state; + struct hlsl_ir_switch_case *c; + bool progress = false; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { +- copy_propagation_state_init(ctx, &inner_state, state); +- progress |= copy_propagation_transform_block(ctx, &c->body, &inner_state); +- copy_propagation_state_destroy(&inner_state); ++ copy_propagation_push_scope(state, ctx); ++ progress |= copy_propagation_transform_block(ctx, &c->body, state); ++ if (state->stopped) ++ return progress; ++ copy_propagation_pop_scope(state); + } + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) +@@ -1964,6 +1994,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { ++ if (instr == state->stop) ++ { ++ state->stopped = true; ++ return progress; ++ } ++ + switch (instr->type) + { + case HLSL_IR_LOAD: +@@ -2001,6 +2037,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b + default: + break; + } ++ ++ if (state->stopped) ++ return progress; + } + + return progress; +@@ -2013,7 +2052,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc + + index_instructions(block, 2); + +- copy_propagation_state_init(ctx, &state, NULL); ++ copy_propagation_state_init(&state, ctx); + + progress = copy_propagation_transform_block(ctx, block, &state); + +@@ -2401,8 +2440,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + struct hlsl_ir_node *new_swizzle; + uint32_t combined_swizzle; + +- combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, +- swizzle->swizzle, instr->data_type->dimx); ++ combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector, ++ swizzle->u.vector, instr->data_type->dimx); + next_instr = hlsl_ir_swizzle(next_instr)->val.node; + + if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) +@@ -2429,7 +2468,7 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i + return false; + + for (i = 0; i < instr->data_type->dimx; ++i) +- if (hlsl_swizzle_get_component(swizzle->swizzle, i) != i) ++ if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i) + return false; + + hlsl_replace_node(instr, swizzle->val.node); +@@ -2788,6 +2827,108 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n + + return true; + } ++ ++static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type) ++{ ++ struct hlsl_type *sampler_type; ++ ++ if (type->class == HLSL_CLASS_ARRAY) ++ { ++ if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type))) ++ return NULL; ++ ++ return hlsl_new_array_type(ctx, sampler_type, type->e.array.elements_count); ++ } ++ ++ return ctx->builtin_types.sampler[type->sampler_dim]; ++} ++ ++static bool deref_offset_is_zero(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) ++{ ++ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); ++ unsigned int index; ++ ++ if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) ++ return false; ++ return index == 0; ++} ++ ++/* Lower samples from separate texture and sampler variables to samples from ++ * synthetized combined samplers. That is, translate SM4-style samples in the ++ * source to SM1-style samples in the bytecode. */ ++static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_var *var, *resource, *sampler; ++ struct hlsl_ir_resource_load *load; ++ struct vkd3d_string_buffer *name; ++ struct hlsl_type *sampler_type; ++ ++ if (instr->type != HLSL_IR_RESOURCE_LOAD) ++ return false; ++ load = hlsl_ir_resource_load(instr); ++ ++ if (load->load_type != HLSL_RESOURCE_SAMPLE ++ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD ++ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) ++ return false; ++ ++ if (!load->sampler.var) ++ return false; ++ resource = load->resource.var; ++ sampler = load->sampler.var; ++ ++ VKD3D_ASSERT(hlsl_type_is_resource(resource->data_type)); ++ VKD3D_ASSERT(hlsl_type_is_resource(sampler->data_type)); ++ if (sampler->data_type->class == HLSL_CLASS_ARRAY && !deref_offset_is_zero(ctx, &load->sampler)) ++ { ++ /* Not supported by d3dcompiler. */ ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, ++ "Lower separated samples with sampler arrays."); ++ return false; ++ } ++ if (!resource->is_uniform) ++ return false; ++ if(!sampler->is_uniform) ++ return false; ++ ++ if (!(name = hlsl_get_string_buffer(ctx))) ++ return false; ++ vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name); ++ ++ TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer)); ++ ++ if (!(var = hlsl_get_var(ctx->globals, name->buffer))) ++ { ++ if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type))) ++ { ++ hlsl_release_string_buffer(ctx, name); ++ return false; ++ } ++ ++ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, sampler_type, &instr->loc, false))) ++ { ++ hlsl_release_string_buffer(ctx, name); ++ return false; ++ } ++ var->storage_modifiers |= HLSL_STORAGE_UNIFORM; ++ var->is_combined_sampler = true; ++ var->is_uniform = 1; ++ ++ list_remove(&var->scope_entry); ++ list_add_after(&sampler->scope_entry, &var->scope_entry); ++ ++ list_add_after(&sampler->extern_entry, &var->extern_entry); ++ } ++ hlsl_release_string_buffer(ctx, name); ++ ++ /* Only change the deref's var, keep the path. */ ++ load->resource.var = var; ++ hlsl_cleanup_deref(&load->sampler); ++ load->sampler.var = NULL; ++ ++ return true; ++} ++ + /* Lower combined samples and sampler variables to synthesized separated textures and samplers. + * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ + static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -2899,6 +3040,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl + list_add_tail(list, &to_add->extern_entry); + } + ++static bool sort_synthetic_combined_samplers_first(struct hlsl_ctx *ctx) ++{ ++ struct list separated_resources; ++ struct hlsl_ir_var *var, *next; ++ ++ list_init(&separated_resources); ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_combined_sampler) ++ { ++ list_remove(&var->extern_entry); ++ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_SAMPLERS); ++ } ++ } ++ ++ list_move_head(&ctx->extern_vars, &separated_resources); ++ ++ return false; ++} ++ + static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) + { + struct list separated_resources; +@@ -4162,9 +4324,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ +- vkd3d_unreachable(); + } + + return false; +@@ -4304,9 +4463,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ +- vkd3d_unreachable(); + + case HLSL_IR_STORE: + { +@@ -4494,6 +4650,9 @@ struct register_allocator + + /* Two allocations with different mode can't share the same register. */ + int mode; ++ /* If an allocation is VIP, no new allocations can be made in the ++ * register unless they are VIP as well. */ ++ bool vip; + } *allocations; + size_t count, capacity; + +@@ -4513,7 +4672,7 @@ struct register_allocator + }; + + static unsigned int get_available_writemask(const struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) ++ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip) + { + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i; +@@ -4532,6 +4691,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all + writemask &= ~allocation->writemask; + if (allocation->mode != mode) + writemask = 0; ++ if (allocation->vip && !vip) ++ writemask = 0; + } + + if (!writemask) +@@ -4542,7 +4703,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all + } + + static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, +- unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) ++ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip) + { + struct allocation *allocation; + +@@ -4556,16 +4717,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a + allocation->first_write = first_write; + allocation->last_read = last_read; + allocation->mode = mode; ++ allocation->vip = vip; + + allocator->reg_count = max(allocator->reg_count, reg_idx + 1); + } + +-/* reg_size is the number of register components to be reserved, while component_count is the number +- * of components for the register's writemask. In SM1, floats and vectors allocate the whole +- * register, even if they don't use it completely. */ ++/* Allocates a register (or some components of it) within the register allocator. ++ * 'reg_size' is the number of register components to be reserved. ++ * 'component_count' is the number of components for the hlsl_reg's ++ * writemask, which can be smaller than 'reg_size'. For instance, sm1 ++ * floats and vectors allocate the whole register even if they are not ++ * using all components. ++ * 'mode' can be provided to avoid allocating on a register that already has an ++ * allocation with a different mode. ++ * 'force_align' can be used so that the allocation always start in '.x'. ++ * 'vip' can be used so that no new allocations can be made in the given register ++ * unless they are 'vip' as well. */ + static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, unsigned int reg_size, +- unsigned int component_count, int mode, bool force_align) ++ unsigned int component_count, int mode, bool force_align, bool vip) + { + struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; + unsigned int required_size = force_align ? 4 : reg_size; +@@ -4579,7 +4749,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) + { + unsigned int available_writemask = get_available_writemask(allocator, +- first_write, last_read, reg_idx, mode); ++ first_write, last_read, reg_idx, mode, vip); + + if (vkd3d_popcount(available_writemask) >= pref) + { +@@ -4589,7 +4759,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + ret.id = reg_idx; + ret.writemask = hlsl_combine_writemasks(writemask, + vkd3d_write_mask_from_component_count(component_count)); +- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); ++ ++ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip); + return ret; + } + } +@@ -4598,13 +4769,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + ret.id = allocator->reg_count; + ret.writemask = vkd3d_write_mask_from_component_count(component_count); + record_allocation(ctx, allocator, allocator->reg_count, +- vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); ++ vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip); + return ret; + } + + /* Allocate a register with writemask, while reserving reg_writemask. */ +-static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) ++static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, ++ struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, ++ uint32_t reg_writemask, uint32_t writemask, int mode, bool vip) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4614,11 +4786,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + for (reg_idx = 0;; ++reg_idx) + { + if ((get_available_writemask(allocator, first_write, last_read, +- reg_idx, mode) & reg_writemask) == reg_writemask) ++ reg_idx, mode, vip) & reg_writemask) == reg_writemask) + break; + } + +- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); ++ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); + + ret.id = reg_idx; + ret.allocation_size = 1; +@@ -4628,7 +4800,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + } + + static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, +- unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) ++ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) + { + unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; + unsigned int writemask; +@@ -4636,18 +4808,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig + + for (i = 0; i < (reg_size / 4); ++i) + { +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip); + if (writemask != VKD3DSP_WRITEMASK_ALL) + return false; + } +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip); + if ((writemask & last_reg_mask) != last_reg_mask) + return false; + return true; + } + + static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) ++ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4655,15 +4827,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo + + for (reg_idx = 0;; ++reg_idx) + { +- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) ++ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip)) + break; + } + + for (i = 0; i < reg_size / 4; ++i) +- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); ++ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip); + if (reg_size % 4) + record_allocation(ctx, allocator, reg_idx + (reg_size / 4), +- (1u << (reg_size % 4)) - 1, first_write, last_read, mode); ++ (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); + + ret.id = reg_idx; + ret.allocation_size = align(reg_size, 4) / 4; +@@ -4679,9 +4851,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + /* FIXME: We could potentially pack structs or arrays more efficiently... */ + + if (type->class <= HLSL_CLASS_VECTOR) +- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); ++ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false); + else +- return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); ++ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); + } + + static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) +@@ -4859,8 +5031,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, + } + + if (reg_writemask) +- instr->reg = allocate_register_with_masks(ctx, allocator, +- instr->index, instr->last_read, reg_writemask, dst_writemask, 0); ++ instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, ++ instr->last_read, reg_writemask, dst_writemask, 0, false); + else + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, + instr->index, instr->last_read, instr->data_type); +@@ -5084,7 +5256,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + } + } + +-static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) ++static void sort_uniform_by_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort, enum hlsl_regset regset) + { + struct hlsl_ir_var *var; + +@@ -5092,8 +5264,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ + + LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) + { +- uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; +- uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; ++ uint32_t to_sort_size = to_sort->bind_count[regset]; ++ uint32_t var_size = var->bind_count[regset]; + + if (to_sort_size > var_size) + { +@@ -5105,7 +5277,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ + list_add_tail(sorted, &to_sort->extern_entry); + } + +-static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) ++static void sort_uniforms_by_bind_count(struct hlsl_ctx *ctx, enum hlsl_regset regset) + { + struct list sorted = LIST_INIT(sorted); + struct hlsl_ir_var *var, *next; +@@ -5113,7 +5285,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) + LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_uniform) +- sort_uniform_by_numeric_bind_count(&sorted, var); ++ sort_uniform_by_bind_count(&sorted, var, regset); + } + list_move_tail(&ctx->extern_vars, &sorted); + } +@@ -5161,7 +5333,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + struct register_allocator allocator = {0}; + struct hlsl_ir_var *var; + +- sort_uniforms_by_numeric_bind_count(ctx); ++ sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC); + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +@@ -5181,14 +5353,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + { + if (i < bind_count) + { +- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) ++ if (get_available_writemask(&allocator_used, 1, UINT_MAX, ++ reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } +- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); ++ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); + } +- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); ++ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); + } + + var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; +@@ -5211,7 +5384,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { +- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } +@@ -5254,7 +5427,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + var = entry_func->parameters.vars[i]; + if (var->is_output_semantic) + { +- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); ++ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, ++ var->first_write, var->last_read, 0, false); + break; + } + } +@@ -5311,6 +5485,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + enum vkd3d_shader_register_type type; + struct vkd3d_shader_version version; ++ bool special_interpolation = false; ++ bool vip_allocation = false; + uint32_t reg; + bool builtin; + +@@ -5363,6 +5539,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + * domains, it is allocated as if it was 'float[1]'. */ + var->force_align = true; + } ++ ++ if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX ++ || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX ++ || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) ++ vip_allocation = true; ++ ++ if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX) ++ special_interpolation = true; + } + + if (builtin) +@@ -5376,8 +5560,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); + unsigned int reg_size = optimize ? var->data_type->dimx : 4; + +- var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, +- UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); ++ if (special_interpolation) ++ mode = VKD3DSIM_NONE; ++ ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, ++ reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation); + + TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', + var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); +@@ -6419,6 +6606,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); + progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); + progress |= hlsl_copy_propagation_execute(ctx, body); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); +@@ -6636,7 +6824,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d + + swizzle = hlsl_swizzle_from_writemask(src_writemask); + swizzle = hlsl_map_swizzle(swizzle, dst_writemask); +- swizzle = vsir_swizzle_from_hlsl(swizzle); + return swizzle; + } + +@@ -6812,7 +6999,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src + } + + static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, +- struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) ++ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) + { + struct hlsl_ir_constant *constant; + +@@ -6832,6 +7019,242 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, + } + } + ++static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) ++{ ++ const struct hlsl_ir_var *var = deref->var; ++ unsigned int offset_const_deref; ++ ++ reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; ++ reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ ++ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ ++ if (!var->indexable) ++ { ++ offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx[0].offset += offset_const_deref / 4; ++ reg->idx_count = 1; ++ } ++ else ++ { ++ offset_const_deref = deref->const_offset; ++ reg->idx[1].offset = offset_const_deref / 4; ++ reg->idx_count = 2; ++ ++ if (deref->rel_offset.node) ++ { ++ struct vkd3d_shader_src_param *idx_src; ++ ++ if (!(idx_src = vsir_program_get_src_params(program, 1))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return false; ++ } ++ memset(idx_src, 0, sizeof(*idx_src)); ++ reg->idx[1].rel_addr = idx_src; ++ ++ vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ } ++ ++ *writemask = 0xf & (0xf << (offset_const_deref % 4)); ++ if (var->regs[HLSL_REGSET_NUMERIC].writemask) ++ *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); ++ return true; ++} ++ ++static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); ++ const struct hlsl_ir_var *var = deref->var; ++ ++ if (var->is_uniform) ++ { ++ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); ++ ++ if (regset == HLSL_REGSET_TEXTURES) ++ { ++ reg->type = VKD3DSPR_RESOURCE; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_UAVS) ++ { ++ reg->type = VKD3DSPR_UAV; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_SAMPLERS) ++ { ++ reg->type = VKD3DSPR_SAMPLER; ++ reg->dimension = VSIR_DIMENSION_NONE; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; ++ ++ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); ++ reg->type = VKD3DSPR_CONSTBUFFER; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ ++ reg->idx[2].offset = offset / 4; ++ reg->idx_count = 3; ++ } ++ else ++ { ++ reg->idx[0].offset = var->buffer->reg.index; ++ reg->idx[1].offset = offset / 4; ++ reg->idx_count = 2; ++ } ++ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); ++ } ++ } ++ else if (var->is_input_semantic) ++ { ++ bool has_idx; ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0].offset = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ VKD3D_ASSERT(hlsl_reg.allocated); ++ ++ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ reg->type = VKD3DSPR_PATCHCONST; ++ else ++ reg->type = VKD3DSPR_INPUT; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ reg->idx[0].offset = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else if (var->is_output_semantic) ++ { ++ bool has_idx; ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0].offset = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ VKD3D_ASSERT(hlsl_reg.allocated); ++ reg->type = VKD3DSPR_OUTPUT; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ reg->idx[0].offset = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else ++ { ++ return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, ++ unsigned int dst_writemask, const struct vkd3d_shader_location *loc) ++{ ++ uint32_t writemask; ++ ++ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) ++ return false; ++ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); ++ return true; ++} ++ ++static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, ++ const struct vkd3d_shader_location *loc, unsigned int writemask) ++{ ++ uint32_t reg_writemask; ++ ++ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) ++ return false; ++ dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); ++ return true; ++} ++ + static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) + { +@@ -7059,13 +7482,10 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + +- case HLSL_TYPE_BOOL: +- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); +- break; +- + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); + break; +@@ -7472,9 +7892,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, + dst_param->write_mask = instr->reg.writemask; + + swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); +- swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); ++ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->dimx); + swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); +- swizzle = vsir_swizzle_from_hlsl(swizzle); + + src_param = &ins->src[0]; + VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); +@@ -7624,31 +8043,20 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo + } + + static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +- uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) ++ uint64_t config_flags, struct vsir_program *program) + { + struct vkd3d_shader_version version = {0}; +- struct vkd3d_bytecode_buffer buffer = {0}; + struct hlsl_block block; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + +- write_sm1_uniforms(ctx, &buffer); +- if (buffer.status) +- { +- vkd3d_free(buffer.data); +- ctx->result = buffer.status; +- return; +- } +- ctab->code = buffer.data; +- ctab->size = buffer.size; +- + generate_vsir_signature(ctx, program, entry_func); + + hlsl_block_init(&block); +@@ -7659,38 +8067,401 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + sm1_generate_vsir_block(ctx, &entry_func->body, program); + } + +-static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) ++D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + { +- struct vkd3d_shader_location *loc; +- struct hlsl_ir_node *vsir_instr; +- +- loc = &program->instructions.elements[program->instructions.count - 1].location; +- +- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) ++ switch (type->class) + { +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; +- } +- hlsl_block_add_instr(block, vsir_instr); ++ case HLSL_CLASS_ARRAY: ++ return hlsl_sm1_class(type->e.array.type); ++ case HLSL_CLASS_MATRIX: ++ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) ++ return D3DXPC_MATRIX_COLUMNS; ++ else ++ return D3DXPC_MATRIX_ROWS; ++ case HLSL_CLASS_SCALAR: ++ return D3DXPC_SCALAR; ++ case HLSL_CLASS_STRUCT: ++ return D3DXPC_STRUCT; ++ case HLSL_CLASS_VECTOR: ++ return D3DXPC_VECTOR; ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_VERTEX_SHADER: ++ return D3DXPC_OBJECT; ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_ERROR: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RASTERIZER_STATE: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: ++ case HLSL_CLASS_NULL: ++ break; ++ } ++ ++ vkd3d_unreachable(); + } + +-static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, +- struct vsir_program *program, struct hlsl_ir_node *instr) ++D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler) + { +- struct vkd3d_shader_location *loc; +- struct hlsl_ir_node *vsir_instr; ++ enum hlsl_type_class class = type->class; + +- loc = &program->instructions.elements[program->instructions.count - 1].location; ++ if (is_combined_sampler) ++ class = HLSL_CLASS_TEXTURE; + +- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, +- program->instructions.count - 1, instr->data_type, &instr->reg, loc))) ++ switch (class) + { +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ switch (type->e.numeric.type) ++ { ++ case HLSL_TYPE_BOOL: ++ return D3DXPT_BOOL; ++ /* Actually double behaves differently depending on DLL version: ++ * For <= 36, it maps to D3DXPT_FLOAT. ++ * For 37-40, it maps to zero (D3DXPT_VOID). ++ * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_* ++ * values are mostly compatible with D3DXPT_*). ++ * However, the latter two cases look like bugs, and a reasonable ++ * application certainly wouldn't know what to do with them. ++ * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */ ++ case HLSL_TYPE_DOUBLE: ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ return D3DXPT_FLOAT; ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ return D3DXPT_INT; ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ case HLSL_CLASS_SAMPLER: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3DXPT_SAMPLER1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3DXPT_SAMPLER2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3DXPT_SAMPLER3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3DXPT_SAMPLERCUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3DXPT_SAMPLER; ++ default: ++ ERR("Invalid dimension %#x.\n", type->sampler_dim); ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_CLASS_TEXTURE: ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return D3DXPT_TEXTURE1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return D3DXPT_TEXTURE2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return D3DXPT_TEXTURE3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return D3DXPT_TEXTURECUBE; ++ case HLSL_SAMPLER_DIM_GENERIC: ++ return D3DXPT_TEXTURE; ++ default: ++ ERR("Invalid dimension %#x.\n", type->sampler_dim); ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_CLASS_ARRAY: ++ return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler); ++ ++ case HLSL_CLASS_STRUCT: ++ return D3DXPT_VOID; ++ ++ case HLSL_CLASS_STRING: ++ return D3DXPT_STRING; ++ ++ case HLSL_CLASS_PIXEL_SHADER: ++ return D3DXPT_PIXELSHADER; ++ ++ case HLSL_CLASS_VERTEX_SHADER: ++ return D3DXPT_VERTEXSHADER; ++ ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_EFFECT_GROUP: ++ case HLSL_CLASS_ERROR: ++ case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RASTERIZER_STATE: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: ++ case HLSL_CLASS_NULL: ++ break; ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, ++ struct hlsl_type *type, bool is_combined_sampler, unsigned int ctab_start) ++{ ++ const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); ++ unsigned int array_size = hlsl_get_multiarray_size(type); ++ struct hlsl_struct_field *field; ++ unsigned int field_count = 0; ++ size_t fields_offset = 0; ++ size_t i; ++ ++ if (type->bytecode_offset) + return; ++ ++ if (array_type->class == HLSL_CLASS_STRUCT) ++ { ++ field_count = array_type->e.record.field_count; ++ ++ for (i = 0; i < field_count; ++i) ++ { ++ field = &array_type->e.record.fields[i]; ++ field->name_bytecode_offset = put_string(buffer, field->name); ++ write_sm1_type(buffer, field->type, false, ctab_start); ++ } ++ ++ fields_offset = bytecode_align(buffer) - ctab_start; ++ ++ for (i = 0; i < field_count; ++i) ++ { ++ field = &array_type->e.record.fields[i]; ++ put_u32(buffer, field->name_bytecode_offset - ctab_start); ++ put_u32(buffer, field->type->bytecode_offset - ctab_start); ++ } ++ } ++ ++ type->bytecode_offset = put_u32(buffer, ++ vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); ++ put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); ++ put_u32(buffer, vkd3d_make_u32(array_size, field_count)); ++ put_u32(buffer, fields_offset); ++} ++ ++static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort) ++{ ++ struct hlsl_ir_var *var; ++ ++ list_remove(&to_sort->extern_entry); ++ ++ LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) ++ { ++ if (strcmp(to_sort->name, var->name) < 0) ++ { ++ list_add_before(&var->extern_entry, &to_sort->extern_entry); ++ return; ++ } ++ } ++ ++ list_add_tail(sorted, &to_sort->extern_entry); ++} ++ ++static void sm1_sort_externs(struct hlsl_ctx *ctx) ++{ ++ struct list sorted = LIST_INIT(sorted); ++ struct hlsl_ir_var *var, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_uniform) ++ sm1_sort_extern(&sorted, var); ++ } ++ list_move_tail(&ctx->extern_vars, &sorted); ++} ++ ++static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++{ ++ size_t ctab_start, vars_offset, vars_start, creator_offset, offset; ++ unsigned int uniform_count = 0, r; ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ for (r = 0; r <= HLSL_REGSET_LAST; ++r) ++ { ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) ++ continue; ++ ++ ++uniform_count; ++ ++ if (var->is_param && var->is_uniform) ++ { ++ char *new_name; ++ ++ if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name))) ++ return; ++ vkd3d_free((char *)var->name); ++ var->name = new_name; ++ } ++ } ++ } ++ ++ sm1_sort_externs(ctx); ++ ++ ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */ ++ creator_offset = put_u32(buffer, 0); ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ put_u32(buffer, D3DVS_VERSION(ctx->profile->major_version, ctx->profile->minor_version)); ++ else ++ put_u32(buffer, D3DPS_VERSION(ctx->profile->major_version, ctx->profile->minor_version)); ++ put_u32(buffer, uniform_count); ++ vars_offset = put_u32(buffer, 0); ++ put_u32(buffer, 0); /* FIXME: flags */ ++ put_u32(buffer, 0); /* FIXME: target string */ ++ ++ vars_start = bytecode_align(buffer); ++ set_u32(buffer, vars_offset, vars_start - ctab_start); ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ for (r = 0; r <= HLSL_REGSET_LAST; ++r) ++ { ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) ++ continue; ++ ++ put_u32(buffer, 0); /* name */ ++ if (r == HLSL_REGSET_NUMERIC) ++ { ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id)); ++ put_u32(buffer, var->bind_count[r]); ++ } ++ else ++ { ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); ++ put_u32(buffer, var->bind_count[r]); ++ } ++ put_u32(buffer, 0); /* type */ ++ put_u32(buffer, 0); /* default value */ ++ } ++ } ++ ++ uniform_count = 0; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ for (r = 0; r <= HLSL_REGSET_LAST; ++r) ++ { ++ size_t var_offset, name_offset; ++ ++ if (var->semantic.name || !var->regs[r].allocated || !var->last_read) ++ continue; ++ ++ var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t)); ++ ++ name_offset = put_string(buffer, var->name); ++ set_u32(buffer, var_offset, name_offset - ctab_start); ++ ++ write_sm1_type(buffer, var->data_type, var->is_combined_sampler, ctab_start); ++ set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); ++ ++ if (var->default_values) ++ { ++ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int comp_count = hlsl_type_component_count(var->data_type); ++ unsigned int default_value_offset; ++ unsigned int k; ++ ++ default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); ++ set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); ++ ++ for (k = 0; k < comp_count; ++k) ++ { ++ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ unsigned int comp_offset; ++ enum hlsl_regset regset; ++ ++ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); ++ if (regset == HLSL_REGSET_NUMERIC) ++ { ++ union ++ { ++ uint32_t u; ++ float f; ++ } uni; ++ ++ switch (comp_type->e.numeric.type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ if (ctx->double_as_float_alias) ++ uni.u = var->default_values[k].number.u; ++ else ++ uni.u = 0; ++ break; ++ ++ case HLSL_TYPE_INT: ++ uni.f = var->default_values[k].number.i; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ uni.f = var->default_values[k].number.u; ++ break; ++ ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ uni.u = var->default_values[k].number.u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); ++ } ++ } ++ } ++ ++ ++uniform_count; ++ } + } + +- list_add_before(&instr->entry, &vsir_instr->entry); +- hlsl_replace_node(instr, vsir_instr); ++ offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL)); ++ set_u32(buffer, creator_offset, offset - ctab_start); ++} ++ ++static void sm1_generate_ctab(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ctab) ++{ ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ ++ write_sm1_uniforms(ctx, &buffer); ++ if (buffer.status) ++ { ++ vkd3d_free(buffer.data); ++ ctx->result = buffer.status; ++ return; ++ } ++ ctab->code = buffer.data; ++ ctab->size = buffer.size; + } + + static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, +@@ -7806,8 +8577,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + + if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) + ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); +- +- add_last_vsir_instr_to_block(ctx, program, block); + } + + static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, +@@ -7819,8 +8588,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ + return; + + ins->declaration.count = temp_count; +- +- add_last_vsir_instr_to_block(ctx, program, block); + } + + static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, +@@ -7838,8 +8605,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, + ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; + ins->declaration.indexable_temp.component_count = comp_count; + ins->declaration.indexable_temp.has_function_scope = false; +- +- add_last_vsir_instr_to_block(ctx, program, block); + } + + static bool type_is_float(const struct hlsl_type *type) +@@ -8505,59 +9270,690 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, + } + } + +-static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_store *store) + { +- struct vkd3d_string_buffer *dst_type_string; +- struct hlsl_ir_node *instr, *next; +- struct hlsl_ir_switch_case *c; ++ struct hlsl_ir_node *instr = &store->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_instruction *ins; + +- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) +- { +- if (instr->data_type) +- { +- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) +- { +- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); +- break; +- } +- } ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) ++ return false; + +- switch (instr->type) +- { +- case HLSL_IR_CALL: +- vkd3d_unreachable(); ++ dst_param = &ins->dst[0]; ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ dst_param, &store->lhs, &instr->loc, store->writemask)) ++ return false; + +- case HLSL_IR_CONSTANT: +- /* In SM4 all constants are inlined. */ +- break; ++ src_param = &ins->src[0]; ++ vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); + +- case HLSL_IR_EXPR: +- if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) +- break; ++ return true; ++} + +- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) +- replace_instr_with_last_vsir_instr(ctx, program, instr); ++/* Does this variable's data come directly from the API user, rather than ++ * being temporary or from a previous shader stage? I.e. is it a uniform or ++ * VS input? */ ++static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) ++{ ++ if (var->is_uniform) ++ return true; + +- hlsl_release_string_buffer(ctx, dst_type_string); +- break; ++ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; ++} + +- case HLSL_IR_IF: +- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); +- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); +- break; ++static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_type *type = load->node.data_type; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct hlsl_ir_node *instr = &load->node; ++ struct vkd3d_shader_instruction *ins; ++ struct hlsl_constant_value value; + +- case HLSL_IR_LOOP: +- sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); ++ VKD3D_ASSERT(hlsl_is_numeric_type(type)); ++ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) ++ { ++ /* Uniform bools can be specified as anything, but internal bools ++ * always have 0 for false and ~0 for true. Normalise that here. */ ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) ++ return false; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) ++ return false; ++ ++ memset(&value, 0xff, sizeof(value)); ++ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, ++ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ memset(&value, 0x00, sizeof(value)); ++ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, ++ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ } ++ else ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) ++ return false; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) ++ return false; ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_resource_store *store) ++{ ++ struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); ++ struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; ++ struct hlsl_ir_node *instr = &store->node; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int writemask; ++ ++ if (!store->resource.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); ++ return false; ++ } ++ ++ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ { ++ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); ++ return false; ++ } ++ ++ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) ++ return false; ++ ++ writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ &ins->dst[0], &store->resource, &instr->loc, writemask)) ++ return false; ++ } ++ else ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) ++ return false; ++ ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) ++ return false; ++ } ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); ++ ++ return true; ++} ++ ++static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset) ++{ ++ struct hlsl_ir_constant *offset; ++ ++ VKD3D_ASSERT(texel_offset); ++ if (texel_offset->type != HLSL_IR_CONSTANT) ++ return false; ++ offset = hlsl_ir_constant(texel_offset); ++ ++ if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) ++ return false; ++ if (offset->node.data_type->dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) ++ return false; ++ if (offset->node.data_type->dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) ++ return false; ++ return true; ++} ++ ++static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi( ++ struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset) ++{ ++ struct hlsl_ir_constant *offset; ++ ++ if (!texel_offset) ++ return; ++ offset = hlsl_ir_constant(texel_offset); ++ ++ ins->texel_offset.u = offset->value.u[0].i; ++ ins->texel_offset.v = 0; ++ ins->texel_offset.w = 0; ++ if (offset->node.data_type->dimx > 1) ++ ins->texel_offset.v = offset->value.u[1].i; ++ if (offset->node.data_type->dimx > 2) ++ ins->texel_offset.w = offset->value.u[2].i; ++} ++ ++static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource); ++ bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS); ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; ++ const struct hlsl_ir_node *sample_index = load->sample_index.node; ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *instr = &load->node; ++ enum hlsl_sampler_dim dim = load->sampling_dim; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ bool multisampled; ++ ++ VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD); ++ ++ multisampled = resource_type->class == HLSL_CLASS_TEXTURE ++ && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); ++ ++ if (uav) ++ opcode = VKD3DSIH_LD_UAV_TYPED; ++ else if (raw) ++ opcode = VKD3DSIH_LD_RAW; ++ else ++ opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled))) ++ return false; ++ ++ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7."); ++ return false; ++ } ++ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ if (!uav) ++ { ++ /* Mipmap level is in the last component in the IR, but needs to be in ++ * the W component in the instruction. */ ++ unsigned int dim_count = hlsl_sampler_dim_count(dim); ++ ++ if (dim_count == 1) ++ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; ++ if (dim_count == 2) ++ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; ++ } ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (multisampled) ++ { ++ if (sample_index->type == HLSL_IR_CONSTANT) ++ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, ++ &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0); ++ else if (version->major == 4 && version->minor == 0) ++ hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); ++ else ++ vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_deref *sampler = &load->sampler; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ unsigned int src_count; ++ ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_SAMPLE: ++ opcode = VKD3DSIH_SAMPLE; ++ src_count = 3; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_CMP: ++ opcode = VKD3DSIH_SAMPLE_C; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_CMP_LZ: ++ opcode = VKD3DSIH_SAMPLE_C_LZ; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ opcode = VKD3DSIH_SAMPLE_LOD; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ opcode = VKD3DSIH_SAMPLE_B; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ opcode = VKD3DSIH_SAMPLE_GRAD; ++ src_count = 5; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) ++ return false; ++ ++ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7."); ++ return false; ++ } ++ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], ++ resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2], ++ sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) ++ return false; ++ ++ if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B) ++ { ++ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ) ++ { ++ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ else if (opcode == VKD3DSIH_SAMPLE_GRAD) ++ { ++ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, ++ const struct hlsl_ir_resource_load *load, uint32_t swizzle) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_deref *sampler = &load->sampler; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ ++ opcode = VKD3DSIH_GATHER4; ++ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) ++ { ++ if (!vkd3d_shader_ver_ge(version, 5, 0)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); ++ return false; ++ } ++ opcode = VKD3DSIH_GATHER4_PO; ++ } ++ ++ if (opcode == VKD3DSIH_GATHER4) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 3))) ++ return false; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) ++ return false; ++ ins->src[2].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[2].swizzle = swizzle; ++ } ++ else if (opcode == VKD3DSIH_GATHER4_PO) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 4))) ++ return false; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[2], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[3], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) ++ return false; ++ ins->src[3].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[3].swizzle = swizzle; ++ } ++ else ++ { ++ vkd3d_unreachable(); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct hlsl_type *type = instr->data_type; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) ++ return false; ++ ++ if (type->e.numeric.type == HLSL_TYPE_UINT) ++ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct hlsl_type *type = instr->data_type; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER ++ || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers."); ++ return false; ++ } ++ ++ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2))) ++ return false; ++ ++ if (type->e.numeric.type == HLSL_TYPE_UINT) ++ ins->flags = VKD3DSI_RESINFO_UINT; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ if (load->sampler.var && !load->sampler.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); ++ return false; ++ } ++ ++ if (!load->resource.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); ++ return false; ++ } ++ ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_LOAD: ++ return sm4_generate_vsir_instr_ld(ctx, program, load); ++ ++ case HLSL_RESOURCE_SAMPLE: ++ case HLSL_RESOURCE_SAMPLE_CMP: ++ case HLSL_RESOURCE_SAMPLE_CMP_LZ: ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ /* Combined sample expressions were lowered. */ ++ VKD3D_ASSERT(load->sampler.var); ++ return sm4_generate_vsir_instr_sample(ctx, program, load); ++ ++ case HLSL_RESOURCE_GATHER_RED: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(X, X, X, X)); ++ ++ case HLSL_RESOURCE_GATHER_GREEN: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); ++ ++ case HLSL_RESOURCE_GATHER_BLUE: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)); ++ ++ case HLSL_RESOURCE_GATHER_ALPHA: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(W, W, W, W)); ++ ++ case HLSL_RESOURCE_SAMPLE_INFO: ++ return sm4_generate_vsir_instr_sample_info(ctx, program, load); ++ ++ case HLSL_RESOURCE_RESINFO: ++ return sm4_generate_vsir_instr_resinfo(ctx, program, load); ++ ++ case HLSL_RESOURCE_SAMPLE_PROJ: ++ vkd3d_unreachable(); ++ ++ default: ++ return false; ++ } ++} ++ ++static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_jump *jump) ++{ ++ const struct hlsl_ir_node *instr = &jump->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ switch (jump->type) ++ { ++ case HLSL_IR_JUMP_BREAK: ++ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0); ++ ++ case HLSL_IR_JUMP_CONTINUE: ++ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0); ++ ++ case HLSL_IR_JUMP_DISCARD_NZ: ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1))) ++ return false; ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL); ++ return true; ++ ++ case HLSL_IR_JUMP_RETURN: ++ vkd3d_unreachable(); ++ ++ default: ++ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); ++ return false; ++ } ++} ++ ++static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); ++ ++static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) ++{ ++ struct hlsl_ir_node *instr = &iff->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) ++ return; ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL); ++ ++ sm4_generate_vsir_block(ctx, &iff->then_block, program); ++ ++ if (!list_empty(&iff->else_block.instrs)) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) ++ return; ++ sm4_generate_vsir_block(ctx, &iff->else_block, program); ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) ++ return; ++} ++ ++static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_loop *loop) ++{ ++ struct hlsl_ir_node *instr = &loop->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0))) ++ return; ++ ++ sm4_generate_vsir_block(ctx, &loop->body, program); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0))) ++ return; ++} ++ ++static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_switch *swi) ++{ ++ const struct hlsl_ir_node *selector = swi->selector.node; ++ struct hlsl_ir_node *instr = &swi->node; ++ struct vkd3d_shader_instruction *ins; ++ struct hlsl_ir_switch_case *cas; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1))) ++ return; ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL); ++ ++ LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry) ++ { ++ if (cas->is_default) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0))) ++ return; ++ } ++ else ++ { ++ struct hlsl_constant_value value = {.u[0].u = cas->value}; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1))) ++ return; ++ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL); ++ } ++ ++ sm4_generate_vsir_block(ctx, &cas->body, program); ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0))) ++ return; ++} ++ ++static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++{ ++ struct vkd3d_string_buffer *dst_type_string; ++ struct hlsl_ir_node *instr, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->data_type) ++ { ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); ++ break; ++ } ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_CALL: ++ vkd3d_unreachable(); ++ ++ case HLSL_IR_CONSTANT: ++ /* In SM4 all constants are inlined. */ ++ break; ++ ++ case HLSL_IR_EXPR: ++ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) ++ break; ++ sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer); ++ hlsl_release_string_buffer(ctx, dst_type_string); ++ break; ++ ++ case HLSL_IR_IF: ++ sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); ++ break; ++ ++ case HLSL_IR_LOAD: ++ sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); ++ break; ++ ++ case HLSL_IR_LOOP: ++ sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr)); ++ break; ++ ++ case HLSL_IR_RESOURCE_LOAD: ++ sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); ++ break; ++ ++ case HLSL_IR_RESOURCE_STORE: ++ sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr)); ++ break; ++ ++ case HLSL_IR_JUMP: ++ sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); ++ break; ++ ++ case HLSL_IR_STORE: ++ sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); + break; + + case HLSL_IR_SWITCH: +- LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) +- sm4_generate_vsir_block(ctx, &c->body, program); ++ sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr)); + break; + + case HLSL_IR_SWIZZLE: + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); +- replace_instr_with_last_vsir_instr(ctx, program, instr); + break; + + default: +@@ -8582,42 +9978,330 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + return; + program->temp_count = max(program->temp_count, temp_count); + +- hlsl_block_init(&block); ++ hlsl_block_init(&block); ++ ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if ((var->is_input_semantic && var->last_read) ++ || (var->is_output_semantic && var->first_write)) ++ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); ++ } ++ ++ if (temp_count) ++ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) ++ continue; ++ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) ++ continue; ++ ++ if (var->indexable) ++ { ++ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; ++ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; ++ ++ sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); ++ } ++ } ++ } ++ ++ list_move_head(&func->body.instrs, &block.instrs); ++ ++ hlsl_block_cleanup(&block); ++ ++ sm4_generate_vsir_block(ctx, &func->body, program); ++ ++ generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); ++} ++ ++static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program) ++{ ++ struct extern_resource *extern_resources; ++ unsigned int extern_resources_count; ++ ++ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); ++ for (unsigned int i = 0; i < extern_resources_count; ++i) ++ { ++ if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) ++ program->features.rovs = true; ++ } ++ sm4_free_extern_resources(extern_resources, extern_resources_count); ++ ++ /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, ++ * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ ++} ++ ++static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_function_decl *entry_func) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ struct extern_resource *extern_resources; ++ unsigned int extern_resources_count, i; ++ ++ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); ++ ++ if (version->major == 4) ++ { ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ const struct extern_resource *resource = &extern_resources[i]; ++ const struct hlsl_type *type = resource->component_type; ++ ++ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ program->global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; ++ break; ++ } ++ } ++ } ++ ++ sm4_free_extern_resources(extern_resources, extern_resources_count); ++ ++ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) ++ program->global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; ++} ++ ++static void sm4_generate_vsir_add_dcl_constant_buffer(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_buffer *cbuffer) ++{ ++ unsigned int array_first = cbuffer->reg.index; ++ unsigned int array_last = cbuffer->reg.index; /* FIXME: array end. */ ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &cbuffer->loc, VKD3DSIH_DCL_CONSTANT_BUFFER, 0, 0))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ ins->declaration.cb.size = cbuffer->size; ++ ++ src_param = &ins->declaration.cb.src; ++ vsir_src_param_init(src_param, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 0); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ ins->declaration.cb.range.space = cbuffer->reg.space; ++ ins->declaration.cb.range.first = array_first; ++ ins->declaration.cb.range.last = array_last; ++ ++ src_param->reg.idx[0].offset = cbuffer->reg.id; ++ src_param->reg.idx[1].offset = array_first; ++ src_param->reg.idx[2].offset = array_last; ++ src_param->reg.idx_count = 3; ++} ++ ++static void sm4_generate_vsir_add_dcl_sampler(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct extern_resource *resource) ++{ ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int i; ++ ++ VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); ++ VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); ++ ++ for (i = 0; i < resource->bind_count; ++i) ++ { ++ unsigned int array_first = resource->index + i; ++ unsigned int array_last = resource->index + i; /* FIXME: array end. */ ++ ++ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) ++ continue; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, VKD3DSIH_DCL_SAMPLER, 0, 0))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) ++ ins->flags |= VKD3DSI_SAMPLER_COMPARISON_MODE; ++ ++ src_param = &ins->declaration.sampler.src; ++ vsir_src_param_init(src_param, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 0); ++ ++ ins->declaration.sampler.range.first = array_first; ++ ins->declaration.sampler.range.last = array_last; ++ ins->declaration.sampler.range.space = resource->space; ++ ++ src_param->reg.idx[0].offset = resource->id; ++ src_param->reg.idx[1].offset = array_first; ++ src_param->reg.idx[2].offset = array_last; ++ src_param->reg.idx_count = 3; ++ } ++} ++ ++static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const struct hlsl_type *type) ++{ ++ switch (type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_1D: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_1D; ++ case HLSL_SAMPLER_DIM_2D: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ case HLSL_SAMPLER_DIM_3D: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_3D; ++ case HLSL_SAMPLER_DIM_CUBE: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; ++ case HLSL_SAMPLER_DIM_1DARRAY: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY; ++ case HLSL_SAMPLER_DIM_2DARRAY: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; ++ case HLSL_SAMPLER_DIM_2DMS: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_2DMS; ++ case HLSL_SAMPLER_DIM_2DMSARRAY: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY; ++ case HLSL_SAMPLER_DIM_CUBEARRAY: ++ return VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY; ++ case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ return VKD3D_SHADER_RESOURCE_BUFFER; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_type *type) ++{ ++ const struct hlsl_type *format = type->e.resource.format; ++ ++ switch (format->e.numeric.type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ return VKD3D_DATA_DOUBLE; ++ ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_HALF: ++ if (format->modifiers & HLSL_MODIFIER_UNORM) ++ return VKD3D_DATA_UNORM; ++ if (format->modifiers & HLSL_MODIFIER_SNORM) ++ return VKD3D_DATA_SNORM; ++ return VKD3D_DATA_FLOAT; ++ ++ case HLSL_TYPE_INT: ++ return VKD3D_DATA_INT; ++ break; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ return VKD3D_DATA_UINT; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct extern_resource *resource, ++ bool uav) ++{ ++ enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; ++ struct vkd3d_shader_structured_resource *structured_resource; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_semantic *semantic; ++ struct vkd3d_shader_instruction *ins; ++ struct hlsl_type *component_type; ++ enum vkd3d_shader_opcode opcode; ++ bool multisampled; ++ unsigned int i, j; ++ ++ VKD3D_ASSERT(resource->regset == regset); ++ VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); + +- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ component_type = resource->component_type; ++ ++ for (i = 0; i < resource->bind_count; ++i) + { +- if ((var->is_input_semantic && var->last_read) +- || (var->is_output_semantic && var->first_write)) +- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); +- } ++ unsigned int array_first = resource->index + i; ++ unsigned int array_last = resource->index + i; /* FIXME: array end. */ + +- if (temp_count) +- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ if (resource->var && !resource->var->objects_usage[regset][i].used) ++ continue; + +- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) +- { +- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ if (uav) + { +- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) +- continue; +- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) +- continue; +- +- if (var->indexable) ++ switch (component_type->sampler_dim) + { +- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; +- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; +- +- sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ opcode = VKD3DSIH_DCL_UAV_STRUCTURED; ++ break; ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: ++ opcode = VKD3DSIH_DCL_UAV_RAW; ++ break; ++ default: ++ opcode = VKD3DSIH_DCL_UAV_TYPED; ++ break; ++ } ++ } ++ else ++ { ++ switch (component_type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: ++ opcode = VKD3DSIH_DCL_RESOURCE_RAW; ++ break; ++ default: ++ opcode = VKD3DSIH_DCL; ++ break; + } + } +- } + +- list_move_head(&func->body.instrs, &block.instrs); ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, opcode, 0, 0))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ semantic = &ins->declaration.semantic; ++ structured_resource = &ins->declaration.structured_resource; ++ dst_param = &semantic->resource.reg; ++ vsir_dst_param_init(dst_param, uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 0); + +- hlsl_block_cleanup(&block); ++ if (uav && component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ structured_resource->byte_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; ++ if (uav && component_type->e.resource.rasteriser_ordered) ++ ins->flags = VKD3DSUF_RASTERISER_ORDERED_VIEW; + +- sm4_generate_vsir_block(ctx, &func->body, program); ++ multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; ++ ++ if (!hlsl_version_ge(ctx, 4, 1) && multisampled && !component_type->sample_count) ++ { ++ hlsl_error(ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Multisampled texture object declaration needs sample count for profile %u.%u.", ++ ctx->profile->major_version, ctx->profile->minor_version); ++ } ++ ++ for (j = 0; j < 4; ++j) ++ semantic->resource_data_type[j] = sm4_generate_vsir_get_format_type(component_type); ++ ++ semantic->resource.range.first = array_first; ++ semantic->resource.range.last = array_last; ++ semantic->resource.range.space = resource->space; ++ ++ dst_param->reg.idx[0].offset = resource->id; ++ dst_param->reg.idx[1].offset = array_first; ++ dst_param->reg.idx[2].offset = array_last; ++ dst_param->reg.idx_count = 3; ++ ++ ins->resource_type = sm4_generate_vsir_get_resource_type(resource->component_type); ++ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ ins->raw = true; ++ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ { ++ ins->structured = true; ++ ins->resource_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; ++ } ++ ++ if (multisampled) ++ semantic->sample_count = component_type->sample_count; ++ } + } + + /* OBJECTIVE: Translate all the information from ctx and entry_func to the +@@ -8627,12 +10311,15 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + uint64_t config_flags, struct vsir_program *program) + { + struct vkd3d_shader_version version = {0}; ++ struct extern_resource *extern_resources; ++ unsigned int extern_resources_count; ++ const struct hlsl_buffer *cbuffer; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + +- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; +@@ -8648,45 +10335,178 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + program->thread_group_size.y = ctx->thread_count[1]; + program->thread_group_size.z = ctx->thread_count[2]; + } ++ else if (version.type == VKD3D_SHADER_TYPE_HULL) ++ { ++ program->input_control_point_count = 1; /* TODO: Obtain from InputPatch */ ++ program->output_control_point_count = ctx->output_control_point_count; ++ program->tess_domain = ctx->domain; ++ program->tess_partitioning = ctx->partitioning; ++ program->tess_output_primitive = ctx->output_primitive; ++ } ++ else if (version.type == VKD3D_SHADER_TYPE_DOMAIN) ++ { ++ program->input_control_point_count = 0; /* TODO: Obtain from OutputPatch */ ++ program->tess_domain = ctx->domain; ++ } ++ ++ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (cbuffer->reg.allocated) ++ sm4_generate_vsir_add_dcl_constant_buffer(ctx, program, cbuffer); ++ } ++ ++ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); ++ for (unsigned int i = 0; i < extern_resources_count; ++i) ++ { ++ const struct extern_resource *resource = &extern_resources[i]; ++ ++ if (resource->regset == HLSL_REGSET_SAMPLERS) ++ sm4_generate_vsir_add_dcl_sampler(ctx, program, resource); ++ else if (resource->regset == HLSL_REGSET_TEXTURES) ++ sm4_generate_vsir_add_dcl_texture(ctx, program, resource, false); ++ else if (resource->regset == HLSL_REGSET_UAVS) ++ sm4_generate_vsir_add_dcl_texture(ctx, program, resource, true); ++ } ++ sm4_free_extern_resources(extern_resources, extern_resources_count); + ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ generate_vsir_add_program_instruction(ctx, program, ++ &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); + sm4_generate_vsir_add_function(ctx, func, config_flags, program); + if (version.type == VKD3D_SHADER_TYPE_HULL) ++ { ++ generate_vsir_add_program_instruction(ctx, program, ++ &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); + sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); ++ } ++ ++ generate_vsir_scan_required_features(ctx, program); ++ generate_vsir_scan_global_flags(ctx, program, func); + } + +-static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, +- struct hlsl_block **found_block) ++static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, ++ bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *node; ++ struct hlsl_ir_node *const_node, *store; + +- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ if (!(const_node = hlsl_new_bool_constant(ctx, val, loc))) ++ return false; ++ hlsl_block_add_instr(block, const_node); ++ ++ if (!(store = hlsl_new_simple_store(ctx, var, const_node))) ++ return false; ++ hlsl_block_add_instr(block, store); ++ ++ return true; ++} ++ ++static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued); ++ ++static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, ++ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) ++{ ++ struct hlsl_ir_jump *jump; ++ struct hlsl_ir_var *var; ++ struct hlsl_block draft; ++ struct hlsl_ir_if *iff; ++ ++ if (node->type == HLSL_IR_IF) + { +- if (node == stop_point) +- return NULL; ++ iff = hlsl_ir_if(node); ++ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued)) ++ return true; ++ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued)) ++ return true; ++ return false; ++ } + +- if (node->type == HLSL_IR_IF) +- { +- struct hlsl_ir_if *iff = hlsl_ir_if(node); +- struct hlsl_ir_jump *jump = NULL; ++ if (node->type == HLSL_IR_JUMP) ++ { ++ jump = hlsl_ir_jump(node); ++ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK) ++ return false; + +- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) +- return jump; +- if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) +- return jump; +- } +- else if (node->type == HLSL_IR_JUMP) +- { +- struct hlsl_ir_jump *jump = hlsl_ir_jump(node); ++ hlsl_block_init(&draft); + +- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) +- { +- *found_block = block; +- return jump; +- } +- } ++ if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE) ++ var = loop_continued; ++ else ++ var = loop_broken; ++ ++ if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc)) ++ return false; ++ ++ list_move_before(&jump->node.entry, &draft.instrs); ++ list_remove(&jump->node.entry); ++ hlsl_free_instr(&jump->node); ++ ++ return true; + } + +- return NULL; ++ return false; ++} ++ ++static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, ++ struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *cond, *iff; ++ struct hlsl_block then_block; ++ struct hlsl_ir_load *load; ++ ++ hlsl_block_init(&then_block); ++ ++ if (!(load = hlsl_new_var_load(ctx, var, loc))) ++ return NULL; ++ hlsl_block_add_instr(dst, &load->node); ++ ++ if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc))) ++ return NULL; ++ hlsl_block_add_instr(dst, cond); ++ ++ if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) ++ return NULL; ++ hlsl_block_add_instr(dst, iff); ++ ++ return hlsl_ir_if(iff); ++} ++ ++static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) ++{ ++ struct hlsl_ir_node *node, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ struct hlsl_ir_if *broken_check, *continued_check; ++ struct hlsl_block draft; ++ ++ if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued)) ++ continue; ++ ++ if (&next->entry == &block->instrs) ++ return true; ++ ++ hlsl_block_init(&draft); ++ ++ broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc); ++ continued_check = loop_unrolling_generate_var_check(ctx, ++ &broken_check->then_block, loop_continued, &next->loc); ++ ++ list_move_before(&next->entry, &draft.instrs); ++ ++ list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs)); ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) ++{ ++ while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued)); + } + + static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) +@@ -8696,7 +10516,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru + return loop->unroll_limit; + + /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ +- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) + return 1024; + + /* SM4 limits implicit unrolling to 254 iterations. */ +@@ -8707,167 +10527,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru + return 1024; + } + +-static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, +- struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) ++static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct copy_propagation_state *state, unsigned int *index) ++{ ++ size_t scopes_depth = state->scope_count - 1; ++ unsigned int current_index; ++ bool progress; ++ ++ do ++ { ++ state->stopped = false; ++ for (size_t i = state->scope_count; scopes_depth < i; --i) ++ copy_propagation_pop_scope(state); ++ copy_propagation_push_scope(state, ctx); ++ ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); ++ ++ current_index = index_instructions(block, *index); ++ progress |= copy_propagation_transform_block(ctx, block, state); ++ ++ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL); ++ } while (progress); ++ ++ *index = current_index; ++} ++ ++static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var) ++{ ++ struct copy_propagation_value *v; ++ ++ if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX)) ++ || v->node->type != HLSL_IR_CONSTANT) ++ return false; ++ ++ return hlsl_ir_constant(v->node)->value.u[0].u; ++} ++ ++static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) + { +- unsigned int max_iterations, i; ++ struct hlsl_block draft, tmp_dst, loop_body; ++ struct hlsl_ir_var *broken, *continued; ++ unsigned int max_iterations, i, index; ++ struct copy_propagation_state state; ++ struct hlsl_ir_if *target_if; ++ ++ if (!(broken = hlsl_new_synthetic_var(ctx, "broken", ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) ++ goto fail; ++ ++ if (!(continued = hlsl_new_synthetic_var(ctx, "continued", ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) ++ goto fail; ++ ++ hlsl_block_init(&draft); ++ hlsl_block_init(&tmp_dst); + + max_iterations = loop_unrolling_get_max_iterations(ctx, loop); ++ copy_propagation_state_init(&state, ctx); ++ index = 2; ++ state.stop = &loop->node; ++ loop_unrolling_simplify(ctx, block, &state, &index); ++ state.stopped = false; ++ index = loop->node.index; ++ ++ if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc)) ++ goto fail; ++ hlsl_block_add_block(&draft, &tmp_dst); ++ ++ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) ++ goto fail; ++ hlsl_block_add_block(&draft, &tmp_dst); ++ ++ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) ++ goto fail; ++ state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry); ++ hlsl_block_add_block(&draft, &tmp_dst); ++ ++ copy_propagation_push_scope(&state, ctx); ++ loop_unrolling_simplify(ctx, &draft, &state, &index); ++ ++ /* As an optimization, we only remove jumps from the loop's body once. */ ++ if (!hlsl_clone_block(ctx, &loop_body, &loop->body)) ++ goto fail; ++ loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued); + + for (i = 0; i < max_iterations; ++i) + { +- struct hlsl_block tmp_dst, *jump_block; +- struct hlsl_ir_jump *jump = NULL; ++ copy_propagation_push_scope(&state, ctx); + +- if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) +- return false; +- list_move_before(&loop->node.entry, &tmp_dst.instrs); +- hlsl_block_cleanup(&tmp_dst); ++ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) ++ goto fail; ++ hlsl_block_add_block(&target_if->then_block, &tmp_dst); + +- hlsl_run_const_passes(ctx, block); ++ if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body)) ++ goto fail; ++ hlsl_block_add_block(&target_if->then_block, &tmp_dst); + +- if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) +- { +- enum hlsl_ir_jump_type type = jump->type; ++ loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index); + +- if (jump_block != loop_parent) +- { +- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) +- hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, +- "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); +- return false; +- } ++ if (loop_unrolling_check_val(&state, broken)) ++ break; + +- list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); +- hlsl_block_cleanup(&tmp_dst); ++ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) ++ goto fail; ++ hlsl_block_add_block(&draft, &tmp_dst); + +- if (type == HLSL_IR_JUMP_BREAK) +- break; +- } +- } ++ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter)) ++ goto fail; ++ hlsl_block_add_block(&target_if->then_block, &tmp_dst); ++ } + + /* Native will not emit an error if max_iterations has been reached with an + * explicit limit. It also will not insert a loop if there are iterations left + * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ + if (!loop->unroll_limit && i == max_iterations) + { +- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) + hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, + "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); +- return false; ++ goto fail; + } + ++ hlsl_block_cleanup(&loop_body); ++ copy_propagation_state_destroy(&state); ++ ++ list_move_before(&loop->node.entry, &draft.instrs); ++ hlsl_block_cleanup(&draft); + list_remove(&loop->node.entry); + hlsl_free_instr(&loop->node); + + return true; ++ ++fail: ++ hlsl_block_cleanup(&loop_body); ++ copy_propagation_state_destroy(&state); ++ hlsl_block_cleanup(&draft); ++ ++ return false; + } + +-/* +- * loop_unrolling_find_unrollable_loop() is not the normal way to do things; +- * normal passes simply iterate over the whole block and apply a transformation +- * to every relevant instruction. However, loop unrolling can fail, and we want +- * to leave the loop in its previous state in that case. That isn't a problem by +- * itself, except that loop unrolling needs copy-prop in order to work properly, +- * and copy-prop state at the time of the loop depends on the rest of the program +- * up to that point. This means we need to clone the whole program, and at that +- * point we have to search it again anyway to find the clone of the loop we were +- * going to unroll. +- * +- * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop +- * up until the loop instruction, clone just that loop, then use copyprop again +- * with the saved state after unrolling. However, copyprop currently isn't built +- * for that yet [notably, it still relies on indices]. Note also this still doesn't +- * really let us use transform_ir() anyway [since we don't have a good way to say +- * "copyprop from the beginning of the program up to the instruction we're +- * currently processing" from the callback]; we'd have to use a dedicated +- * recursive function instead. */ +-static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, +- struct hlsl_block **containing_block) ++static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) + { +- struct hlsl_ir_node *instr; ++ struct hlsl_block *program = context; ++ struct hlsl_ir_loop *loop; + +- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ if (node->type != HLSL_IR_LOOP) ++ return true; ++ ++ loop = hlsl_ir_loop(node); ++ ++ if (loop->unroll_type != HLSL_LOOP_UNROLL && loop->unroll_type != HLSL_LOOP_FORCE_UNROLL) ++ return true; ++ ++ if (!loop_unrolling_unroll_loop(ctx, program, loop)) ++ loop->unroll_type = HLSL_LOOP_FORCE_LOOP; ++ ++ return true; ++} ++ ++/* We could handle this at parse time. However, loop unrolling often needs to ++ * know the value of variables modified in the "iter" block. It is possible to ++ * detect that all exit paths of a loop body modify such variables in the same ++ * way, but difficult, and d3dcompiler does not attempt to do so. ++ * In fact, d3dcompiler is capable of unrolling the following loop: ++ * for (int i = 0; i < 10; ++i) ++ * { ++ * if (some_uniform > 4) ++ * continue; ++ * } ++ * but cannot unroll the same loop with "++i" moved to each exit path: ++ * for (int i = 0; i < 10;) ++ * { ++ * if (some_uniform > 4) ++ * { ++ * ++i; ++ * continue; ++ * } ++ * ++i; ++ * } ++ */ ++static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) ++{ ++ struct hlsl_ir_loop *loop; ++ ++ if (node->type != HLSL_IR_LOOP) ++ return true; ++ ++ loop = hlsl_ir_loop(node); ++ ++ hlsl_block_add_block(&loop->body, &loop->iter); ++ return true; ++} ++ ++static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop) ++{ ++ struct hlsl_ir_node *node; ++ ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { +- switch (instr->type) ++ switch (node->type) + { + case HLSL_IR_LOOP: + { +- struct hlsl_ir_loop *nested_loop; +- struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); +- +- if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) +- return nested_loop; +- +- if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) +- { +- *containing_block = block; +- return loop; +- } ++ struct hlsl_ir_loop *loop = hlsl_ir_loop(node); + ++ resolve_continues(ctx, &loop->body, loop); + break; + } + case HLSL_IR_IF: + { +- struct hlsl_ir_loop *loop; +- struct hlsl_ir_if *iff = hlsl_ir_if(instr); +- +- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) +- return loop; +- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) +- return loop; +- ++ struct hlsl_ir_if *iff = hlsl_ir_if(node); ++ resolve_continues(ctx, &iff->then_block, last_loop); ++ resolve_continues(ctx, &iff->else_block, last_loop); + break; + } + case HLSL_IR_SWITCH: + { +- struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch *s = hlsl_ir_switch(node); + struct hlsl_ir_switch_case *c; +- struct hlsl_ir_loop *loop; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { +- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) +- return loop; ++ resolve_continues(ctx, &c->body, last_loop); ++ } ++ ++ break; ++ } ++ case HLSL_IR_JUMP: ++ { ++ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); ++ ++ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) ++ break; ++ ++ if (last_loop->type == HLSL_LOOP_FOR) ++ { ++ struct hlsl_block draft; ++ ++ if (!hlsl_clone_block(ctx, &draft, &last_loop->iter)) ++ return; ++ ++ list_move_before(&node->entry, &draft.instrs); ++ hlsl_block_cleanup(&draft); + } + ++ jump->type = HLSL_IR_JUMP_CONTINUE; + break; + } + default: + break; + } + } +- +- return NULL; + } + +-static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) ++static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) + { +- while (true) +- { +- struct hlsl_block clone, *containing_block; +- struct hlsl_ir_loop *loop, *cloned_loop; +- +- if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) +- return; +- +- if (!hlsl_clone_block(ctx, &clone, block)) +- return; +- +- cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); +- VKD3D_ASSERT(cloned_loop); ++ bool progress; + +- if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) +- { +- hlsl_block_cleanup(&clone); +- loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; +- continue; +- } ++ /* These are required by copy propagation, which in turn is required for ++ * unrolling. */ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, split_array_copies, block, NULL); ++ progress |= hlsl_transform_ir(ctx, split_struct_copies, block, NULL); ++ } while (progress); ++ hlsl_transform_ir(ctx, split_matrix_copies, block, NULL); + +- hlsl_block_cleanup(block); +- hlsl_block_init(block); +- hlsl_block_add_block(block, &clone); +- } ++ hlsl_transform_ir(ctx, unroll_loops, block, block); ++ resolve_continues(ctx, block, NULL); ++ hlsl_transform_ir(ctx, resolve_loops, block, NULL); + } + + static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +@@ -9116,7 +11048,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, + hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); + } + +- transform_unroll_loops(ctx, body); ++ loop_unrolling_execute(ctx, body); + hlsl_run_const_passes(ctx, body); + + remove_unreachable_code(ctx, body); +@@ -9126,9 +11058,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); + ++ if (hlsl_version_lt(ctx, 4, 0)) ++ hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); ++ + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); +- if (profile->major_version >= 4) ++ ++ if (hlsl_version_ge(ctx, 4, 0)) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); + + do +@@ -9136,7 +11072,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, + while (hlsl_transform_ir(ctx, dce, body, NULL)); + + hlsl_transform_ir(ctx, track_components_usage, body, NULL); +- sort_synthetic_separated_samplers_first(ctx); ++ if (hlsl_version_lt(ctx, 4, 0)) ++ sort_synthetic_combined_samplers_first(ctx); ++ else ++ sort_synthetic_separated_samplers_first(ctx); + + if (profile->major_version < 4) + { +@@ -9241,14 +11180,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + mark_indexable_vars(ctx, entry_func); + allocate_temp_registers(ctx, entry_func); + allocate_const_registers(ctx, entry_func); ++ sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); + } + else + { + allocate_buffers(ctx); + allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); + allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); ++ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); + } +- allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); + + if (TRACE_ON()) + rb_for_each_entry(&ctx->functions, dump_function, ctx); +@@ -9265,7 +11206,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + struct vsir_program program; + int result; + +- sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); ++ sm1_generate_ctab(ctx, &ctab); ++ if (ctx->result) ++ return ctx->result; ++ ++ sm1_generate_vsir(ctx, entry_func, config_flags, &program); + if (ctx->result) + { + vsir_program_cleanup(&program); +@@ -9282,18 +11227,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + case VKD3D_SHADER_TARGET_DXBC_TPF: + { + uint32_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vkd3d_shader_code rdef = {0}; + struct vsir_program program; + int result; + ++ sm4_generate_rdef(ctx, &rdef); ++ if (ctx->result) ++ return ctx->result; ++ + sm4_generate_vsir(ctx, entry_func, config_flags, &program); + if (ctx->result) + { + vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&rdef); + return ctx->result; + } + +- result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); ++ result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context); + vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&rdef); + return result; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index 716adb15f08..cd7cd2fe6a3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -220,7 +220,9 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + break; + + case HLSL_TYPE_BOOL: +- /* Casts to bool should have already been lowered. */ ++ dst->u[k].u = u ? ~0u : 0u; ++ break; ++ + default: + vkd3d_unreachable(); + } +@@ -1544,6 +1546,149 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + return false; + } + ++static bool is_op_associative(enum hlsl_ir_expr_op op, enum hlsl_base_type type) ++{ ++ switch (op) ++ { ++ case HLSL_OP2_ADD: ++ case HLSL_OP2_MUL: ++ return type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT; ++ ++ case HLSL_OP2_BIT_AND: ++ case HLSL_OP2_BIT_OR: ++ case HLSL_OP2_BIT_XOR: ++ case HLSL_OP2_LOGIC_AND: ++ case HLSL_OP2_LOGIC_OR: ++ case HLSL_OP2_MAX: ++ case HLSL_OP2_MIN: ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static bool is_op_commutative(enum hlsl_ir_expr_op op) ++{ ++ switch (op) ++ { ++ case HLSL_OP2_ADD: ++ case HLSL_OP2_BIT_AND: ++ case HLSL_OP2_BIT_OR: ++ case HLSL_OP2_BIT_XOR: ++ case HLSL_OP2_DOT: ++ case HLSL_OP2_LOGIC_AND: ++ case HLSL_OP2_LOGIC_OR: ++ case HLSL_OP2_MAX: ++ case HLSL_OP2_MIN: ++ case HLSL_OP2_MUL: ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *arg1 , *arg2; ++ struct hlsl_ir_expr *expr; ++ enum hlsl_base_type type; ++ enum hlsl_ir_expr_op op; ++ bool progress = false; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ ++ if (instr->data_type->class > HLSL_CLASS_VECTOR) ++ return false; ++ ++ arg1 = expr->operands[0].node; ++ arg2 = expr->operands[1].node; ++ type = instr->data_type->e.numeric.type; ++ op = expr->op; ++ ++ if (!arg1 || !arg2) ++ return false; ++ ++ if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT) ++ { ++ /* a OP x -> x OP a */ ++ struct hlsl_ir_node *tmp = arg1; ++ ++ arg1 = arg2; ++ arg2 = tmp; ++ progress = true; ++ } ++ ++ if (is_op_associative(op, type)) ++ { ++ struct hlsl_ir_expr *e1 = arg1->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg1) : NULL; ++ struct hlsl_ir_expr *e2 = arg2->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg2) : NULL; ++ ++ if (e1 && e1->op == op && e1->operands[0].node->type != HLSL_IR_CONSTANT ++ && e1->operands[1].node->type == HLSL_IR_CONSTANT) ++ { ++ if (arg2->type == HLSL_IR_CONSTANT) ++ { ++ /* (x OP a) OP b -> x OP (a OP b) */ ++ struct hlsl_ir_node *ab; ++ ++ if (!(ab = hlsl_new_binary_expr(ctx, op, e1->operands[1].node, arg2))) ++ return false; ++ list_add_before(&instr->entry, &ab->entry); ++ ++ arg1 = e1->operands[0].node; ++ arg2 = ab; ++ progress = true; ++ } ++ else if (is_op_commutative(op)) ++ { ++ /* (x OP a) OP y -> (x OP y) OP a */ ++ struct hlsl_ir_node *xy; ++ ++ if (!(xy = hlsl_new_binary_expr(ctx, op, e1->operands[0].node, arg2))) ++ return false; ++ list_add_before(&instr->entry, &xy->entry); ++ ++ arg1 = xy; ++ arg2 = e1->operands[1].node; ++ progress = true; ++ } ++ } ++ ++ if (!progress && arg1->type != HLSL_IR_CONSTANT && e2 && e2->op == op ++ && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT) ++ { ++ /* x OP (y OP a) -> (x OP y) OP a */ ++ struct hlsl_ir_node *xy; ++ ++ if (!(xy = hlsl_new_binary_expr(ctx, op, arg1, e2->operands[0].node))) ++ return false; ++ list_add_before(&instr->entry, &xy->entry); ++ ++ arg1 = xy; ++ arg2 = e2->operands[1].node; ++ progress = true; ++ } ++ ++ } ++ ++ if (progress) ++ { ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; ++ struct hlsl_ir_node *res; ++ ++ if (!(res = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) ++ return false; ++ list_add_before(&instr->entry, &res->entry); ++ hlsl_replace_node(instr, res); ++ } ++ ++ return progress; ++} ++ + bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct hlsl_constant_value value; +@@ -1560,7 +1705,7 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst + src = hlsl_ir_constant(swizzle->val.node); + + for (i = 0; i < swizzle->node.data_type->dimx; ++i) +- value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; ++ value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)]; + + if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) + return false; +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index b0e89bededb..cdc0c18466f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -1,5 +1,6 @@ + /* + * Copyright 2023 Conor McCarthy for CodeWeavers ++ * Copyright 2023-2024 Elizabeth Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public +@@ -201,6 +202,14 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 + src->reg.u.immconst_u32[0] = value; + } + ++static void vsir_src_param_init_io(struct vkd3d_shader_src_param *src, ++ enum vkd3d_shader_register_type reg_type, const struct signature_element *e, unsigned int idx_count) ++{ ++ vsir_src_param_init(src, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = vsir_swizzle_from_writemask(e->mask); ++} ++ + void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) + { + vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); +@@ -214,6 +223,14 @@ static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_ + src->reg.idx[0].offset = idx; + } + ++static void src_param_init_parameter_vec4(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) ++{ ++ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); ++ src->reg.idx[0].offset = idx; ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; ++} ++ + static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) + { + vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2); +@@ -243,6 +260,14 @@ static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigne + src->reg.idx[0].offset = idx; + } + ++static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ src->reg.idx[0].offset = idx; ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; ++} ++ + static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) + { + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); +@@ -278,6 +303,14 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader + param->shift = 0; + } + ++static void vsir_dst_param_init_io(struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_register_type reg_type, ++ const struct signature_element *e, unsigned int idx_count) ++{ ++ vsir_dst_param_init(dst, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = e->mask; ++} ++ + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); +@@ -290,6 +323,14 @@ static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigne + dst->reg.idx[0].offset = idx; + } + ++static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ dst->reg.idx[0].offset = idx; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = VKD3DSP_WRITEMASK_ALL; ++} ++ + static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); +@@ -709,6 +750,76 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, + return VKD3D_OK; + } + ++static enum vkd3d_result vsir_program_lower_dcl_input(struct vsir_program *program, ++ struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) ++{ ++ switch (ins->declaration.dst.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_PATCHCONST: ++ case VKD3DSPR_INCONTROLPOINT: ++ case VKD3DSPR_OUTCONTROLPOINT: ++ break; ++ ++ case VKD3DSPR_PRIMID: ++ case VKD3DSPR_FORKINSTID: ++ case VKD3DSPR_JOININSTID: ++ case VKD3DSPR_THREADID: ++ case VKD3DSPR_THREADGROUPID: ++ case VKD3DSPR_LOCALTHREADID: ++ case VKD3DSPR_LOCALTHREADINDEX: ++ case VKD3DSPR_COVERAGE: ++ case VKD3DSPR_TESSCOORD: ++ case VKD3DSPR_OUTPOINTID: ++ case VKD3DSPR_GSINSTID: ++ case VKD3DSPR_WAVELANECOUNT: ++ case VKD3DSPR_WAVELANEINDEX: ++ bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); ++ break; ++ ++ default: ++ vkd3d_shader_error(ctx->message_context, &ins->location, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Internal compiler error: invalid register type %#x for DCL_INPUT.", ++ ins->declaration.dst.reg.type); ++ return VKD3D_ERROR; ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_lower_dcl_output(struct vsir_program *program, ++ struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) ++{ ++ switch (ins->declaration.dst.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_PATCHCONST: ++ case VKD3DSPR_INCONTROLPOINT: ++ case VKD3DSPR_OUTCONTROLPOINT: ++ break; ++ ++ case VKD3DSPR_DEPTHOUT: ++ case VKD3DSPR_SAMPLEMASK: ++ case VKD3DSPR_DEPTHOUTGE: ++ case VKD3DSPR_DEPTHOUTLE: ++ case VKD3DSPR_OUTSTENCILREF: ++ bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); ++ break; ++ ++ default: ++ vkd3d_shader_error(ctx->message_context, &ins->location, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Internal compiler error: invalid register type %#x for DCL_OUTPUT.", ++ ins->declaration.dst.reg.type); ++ return VKD3D_ERROR; ++ } ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, + struct vsir_transformation_context *ctx) + { +@@ -743,11 +854,31 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + case VKD3DSIH_DCL_GLOBAL_FLAGS: + case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: ++ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: + case VKD3DSIH_DCL_THREAD_GROUP: + case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_shader_instruction_make_nop(ins); + break; + ++ case VKD3DSIH_DCL_INPUT: ++ vsir_program_lower_dcl_input(program, ins, ctx); ++ vkd3d_shader_instruction_make_nop(ins); ++ break; ++ ++ case VKD3DSIH_DCL_OUTPUT: ++ vsir_program_lower_dcl_output(program, ins, ctx); ++ vkd3d_shader_instruction_make_nop(ins); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_SGV: ++ case VKD3DSIH_DCL_INPUT_SIV: ++ case VKD3DSIH_DCL_INPUT_PS: ++ case VKD3DSIH_DCL_INPUT_PS_SGV: ++ case VKD3DSIH_DCL_INPUT_PS_SIV: ++ case VKD3DSIH_DCL_OUTPUT_SIV: ++ vkd3d_shader_instruction_make_nop(ins); ++ break; ++ + case VKD3DSIH_SINCOS: + if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) + return ret; +@@ -847,11 +978,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, + return VKD3D_OK; + } + ++static bool add_signature_element(struct shader_signature *signature, const char *semantic_name, ++ uint32_t semantic_index, uint32_t mask, uint32_t register_index, ++ enum vkd3d_shader_interpolation_mode interpolation_mode) ++{ ++ struct signature_element *new_elements, *e; ++ ++ if (!(new_elements = vkd3d_realloc(signature->elements, ++ (signature->element_count + 1) * sizeof(*signature->elements)))) ++ return false; ++ signature->elements = new_elements; ++ e = &signature->elements[signature->element_count++]; ++ memset(e, 0, sizeof(*e)); ++ e->semantic_name = vkd3d_strdup(semantic_name); ++ e->semantic_index = semantic_index; ++ e->sysval_semantic = VKD3D_SHADER_SV_NONE; ++ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ e->register_count = 1; ++ e->mask = mask; ++ e->used_mask = mask; ++ e->register_index = register_index; ++ e->target_location = register_index; ++ e->interpolation_mode = interpolation_mode; ++ return true; ++} ++ + static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) + { + struct shader_signature *signature = &program->output_signature; +- struct signature_element *new_elements, *e; ++ struct signature_element *e; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) + return VKD3D_OK; +@@ -864,22 +1020,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr + return VKD3D_OK; + } + +- if (!(new_elements = vkd3d_realloc(signature->elements, +- (signature->element_count + 1) * sizeof(*signature->elements)))) ++ if (!add_signature_element(signature, "COLOR", 0, VKD3DSP_WRITEMASK_ALL, SM1_COLOR_REGISTER_OFFSET, VKD3DSIM_NONE)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- signature->elements = new_elements; +- e = &signature->elements[signature->element_count++]; +- memset(e, 0, sizeof(*e)); +- e->semantic_name = vkd3d_strdup("COLOR"); +- e->sysval_semantic = VKD3D_SHADER_SV_NONE; +- e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; +- e->register_count = 1; +- e->mask = VKD3DSP_WRITEMASK_ALL; +- e->used_mask = VKD3DSP_WRITEMASK_ALL; +- e->register_index = SM1_COLOR_REGISTER_OFFSET; +- e->target_location = SM1_COLOR_REGISTER_OFFSET; +- e->interpolation_mode = VKD3DSIM_NONE; +- + return VKD3D_OK; + } + +@@ -1034,6 +1176,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + + e->target_location = map->input_register_index; + ++ TRACE("Mapping signature index %u (mask %#x) to target location %u (mask %#x).\n", ++ i, e->mask, map->input_register_index, map->input_mask); ++ + if ((input_mask & e->mask) == input_mask) + { + ++subset_varying_count; +@@ -1054,6 +1199,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + } + else + { ++ TRACE("Marking signature index %u (mask %#x) as unused.\n", i, e->mask); ++ + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; + } + +@@ -1213,12 +1360,6 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal + vkd3d_shader_instruction_make_nop(ins); + return; + } +- else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( +- &ins->declaration.dst.reg)) +- { +- vkd3d_shader_instruction_make_nop(ins); +- return; +- } + + if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins)) + return; +@@ -1369,25 +1510,15 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param + } + } + +-static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, +- enum vkd3d_shader_register_type reg_type, unsigned int idx_count) +-{ +- param->write_mask = e->mask; +- param->modifiers = 0; +- param->shift = 0; +- vsir_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); +-} +- + static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, + const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, + const struct vkd3d_shader_location *location) + { + struct vkd3d_shader_instruction *ins; +- struct vkd3d_shader_dst_param *param; + const struct signature_element *e; +- unsigned int i, count; ++ unsigned int i, count = 2; + +- for (i = 0, count = 1; i < s->element_count; ++i) ++ for (i = 0; i < s->element_count; ++i) + count += !!s->elements[i].used_mask; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) +@@ -1399,7 +1530,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p + + ins = &normaliser->instructions.elements[dst]; + vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); +- ins->flags = 1; ++ + ++ins; + + for (i = 0; i < s->element_count; ++i) +@@ -1408,26 +1539,35 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p + if (!e->used_mask) + continue; + +- if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) +- { +- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV); +- param = &ins->declaration.register_semantic.reg; +- ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); +- } +- else ++ vsir_instruction_init(ins, location, VKD3DSIH_MOV); ++ ins->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); ++ ins->dst_count = 1; ++ ins->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); ++ ins->src_count = 1; ++ ++ if (!ins->dst || ! ins->src) + { +- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT); +- param = &ins->declaration.dst; ++ WARN("Failed to allocate dst/src param.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; + } + +- shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); +- param->reg.idx[0].offset = input_control_point_count; +- param->reg.idx[1].offset = e->register_index; +- param->write_mask = e->mask; ++ vsir_dst_param_init_io(&ins->dst[0], VKD3DSPR_OUTPUT, e, 2); ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].reg.idx[0].offset = 0; ++ ins->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; ++ ins->dst[0].reg.idx[1].offset = e->register_index; ++ ++ vsir_src_param_init_io(&ins->src[0], VKD3DSPR_INPUT, e, 2); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].reg.idx[0].offset = 0; ++ ins->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; ++ ins->src[0].reg.idx[1].offset = e->register_index; + + ++ins; + } + ++ vsir_instruction_init(ins, location, VKD3DSIH_RET); ++ + return VKD3D_OK; + } + +@@ -1442,7 +1582,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + enum vkd3d_result ret; + unsigned int i, j; + +- VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM4); + + if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + { +@@ -1545,11 +1685,6 @@ static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *n + return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE; + } + +-static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser) +-{ +- return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +-} +- + static bool shader_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask, unsigned int *element_idx) + { +@@ -1920,41 +2055,26 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh + { + VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); + +- /* For a relative-addressed register index, move the id up a slot to separate it from the address, +- * because rel_addr can be replaced with a constant offset in some cases. */ +- if (reg->idx[id_idx].rel_addr) +- { +- reg->idx[id_idx + 1].rel_addr = NULL; +- reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset; +- reg->idx[id_idx].offset -= register_index; +- if (id_idx) +- { +- /* idx[id_idx] now contains the array index, which must be moved below the control point id. */ +- struct vkd3d_shader_register_index tmp = reg->idx[id_idx]; +- reg->idx[id_idx] = reg->idx[id_idx - 1]; +- reg->idx[id_idx - 1] = tmp; +- } +- ++id_idx; +- } +- /* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where +- * tessellation level registers are merged into an array because they're an array in SPIR-V. */ +- else +- { +- ++id_idx; +- memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); +- reg->idx[0].rel_addr = NULL; +- reg->idx[0].offset = reg->idx[id_idx].offset - register_index; +- } ++ /* Make room for the array index at the front of the array. */ ++ ++id_idx; ++ memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0])); ++ ++ /* The array index inherits the register relative address, but is offsetted ++ * by the signature element register index. */ ++ reg->idx[0].rel_addr = reg->idx[id_idx].rel_addr; ++ reg->idx[0].offset = reg->idx[id_idx].offset - register_index; ++ reg->idx[id_idx].rel_addr = NULL; ++ ++ /* The signature index offset will be fixed in the caller. */ + + return id_idx; + } + +-static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, ++static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, + struct io_normaliser *normaliser) + { + unsigned int id_idx, reg_idx, write_mask, element_idx; + struct vkd3d_shader_register *reg = &dst_param->reg; +- struct vkd3d_shader_dst_param **dcl_params; + const struct shader_signature *signature; + const struct signature_element *e; + +@@ -1970,26 +2090,22 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + /* Convert patch constant outputs to the patch constant register type to avoid the need + * to convert compiler symbols when accessed as inputs in a later stage. */ + reg->type = VKD3DSPR_PATCHCONST; +- dcl_params = normaliser->pc_dcl_params; + } + else + { + signature = normaliser->output_signature; +- dcl_params = normaliser->output_dcl_params; + } + break; + + case VKD3DSPR_PATCHCONST: + reg_idx = reg->idx[reg->idx_count - 1].offset; + signature = normaliser->patch_constant_signature; +- dcl_params = normaliser->pc_dcl_params; + break; + + case VKD3DSPR_COLOROUT: + reg_idx = reg->idx[0].offset; + signature = normaliser->output_signature; + reg->type = VKD3DSPR_OUTPUT; +- dcl_params = normaliser->output_dcl_params; + break; + + case VKD3DSPR_INCONTROLPOINT: +@@ -1997,14 +2113,12 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + reg_idx = reg->idx[reg->idx_count - 1].offset; + signature = normaliser->input_signature; + reg->type = VKD3DSPR_INPUT; +- dcl_params = normaliser->input_dcl_params; + break; + + case VKD3DSPR_ATTROUT: + reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset; + signature = normaliser->output_signature; + reg->type = VKD3DSPR_OUTPUT; +- dcl_params = normaliser->output_dcl_params; + break; + + case VKD3DSPR_RASTOUT: +@@ -2014,7 +2128,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; + signature = normaliser->output_signature; + reg->type = VKD3DSPR_OUTPUT; +- dcl_params = normaliser->output_dcl_params; + /* Fog and point size are scalar, but fxc/d3dcompiler emits a full + * write mask when writing to them. */ + if (reg->idx[0].offset > 0) +@@ -2030,54 +2143,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + vkd3d_unreachable(); + e = &signature->elements[element_idx]; + +- if (is_io_dcl) +- { +- /* Validated in the TPF reader. */ +- VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); +- +- if (dcl_params[element_idx]) +- { +- /* Merge split declarations into a single one. */ +- dcl_params[element_idx]->write_mask |= dst_param->write_mask; +- /* Turn this into a nop. */ +- return false; +- } +- else +- { +- dcl_params[element_idx] = dst_param; +- } +- } +- +- if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) +- { +- if (is_io_dcl) +- { +- /* Emit an array size for the control points for consistency with inputs. */ +- reg->idx[0].offset = normaliser->output_control_point_count; +- } +- else +- { +- /* The control point id param. */ +- VKD3D_ASSERT(reg->idx[0].rel_addr); +- } +- id_idx = 1; +- } +- + if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) +- { +- if (is_io_dcl) +- { +- /* For control point I/O, idx 0 contains the control point count. +- * Ensure it is moved up to the next slot. */ +- reg->idx[id_idx].offset = reg->idx[0].offset; +- reg->idx[0].offset = e->register_count; +- ++id_idx; +- } +- else +- { +- id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); +- } +- } ++ id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); + + /* Replace the register index with the signature element index */ + reg->idx[id_idx].offset = element_idx; +@@ -2129,6 +2196,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par + + case VKD3DSPR_OUTCONTROLPOINT: + reg->type = VKD3DSPR_OUTPUT; ++ if (io_normaliser_is_in_fork_or_join_phase(normaliser)) ++ normaliser->use_vocp = true; + /* fall through */ + case VKD3DSPR_OUTPUT: + reg_idx = reg->idx[reg->idx_count - 1].offset; +@@ -2169,40 +2238,10 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par + static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, + struct io_normaliser *normaliser) + { +- struct vkd3d_shader_register *reg; + unsigned int i; + + switch (ins->opcode) + { +- case VKD3DSIH_DCL_INPUT: +- if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) +- { +- reg = &ins->declaration.dst.reg; +- +- if (reg->type == VKD3DSPR_OUTCONTROLPOINT) +- normaliser->use_vocp = true; +- +- /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their +- * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ +- if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) +- vkd3d_shader_instruction_make_nop(ins); +- else if (reg->type == VKD3DSPR_INCONTROLPOINT) +- reg->type = VKD3DSPR_INPUT; +- } +- /* fall through */ +- case VKD3DSIH_DCL_INPUT_PS: +- case VKD3DSIH_DCL_OUTPUT: +- if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser)) +- vkd3d_shader_instruction_make_nop(ins); +- break; +- case VKD3DSIH_DCL_INPUT_SGV: +- case VKD3DSIH_DCL_INPUT_SIV: +- case VKD3DSIH_DCL_INPUT_PS_SGV: +- case VKD3DSIH_DCL_INPUT_PS_SIV: +- case VKD3DSIH_DCL_OUTPUT_SIV: +- if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser)) +- vkd3d_shader_instruction_make_nop(ins); +- break; + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +@@ -2215,7 +2254,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + if (vsir_instruction_is_dcl(ins)) + break; + for (i = 0; i < ins->dst_count; ++i) +- shader_dst_param_io_normalise(&ins->dst[i], false, normaliser); ++ shader_dst_param_io_normalise(&ins->dst[i], normaliser); + for (i = 0; i < ins->src_count; ++i) + shader_src_param_io_normalise(&ins->src[i], normaliser); + break; +@@ -2275,7 +2314,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + + program->instructions = normaliser.instructions; + program->use_vocp = normaliser.use_vocp; +- program->normalisation_level = VSIR_FULLY_NORMALISED_IO; ++ program->normalisation_level = VSIR_NORMALISED_SM6; + return VKD3D_OK; + } + +@@ -6634,149 +6673,747 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr + return VKD3D_OK; + } + +-struct validation_context ++static enum vkd3d_result vsir_program_add_fog_input(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) + { +- struct vkd3d_shader_message_context *message_context; +- const struct vsir_program *program; +- size_t instruction_idx; +- struct vkd3d_shader_location null_location; +- bool invalid_instruction_idx; +- enum vkd3d_result status; +- bool dcl_temps_found; +- enum vkd3d_shader_opcode phase; +- bool inside_block; ++ struct shader_signature *signature = &program->input_signature; ++ uint32_t register_idx = 0; + +- struct validation_context_temp_data +- { +- enum vsir_dimension dimension; +- size_t first_seen; +- } *temps; ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; + +- struct validation_context_ssa_data +- { +- enum vsir_dimension dimension; +- enum vkd3d_data_type data_type; +- size_t first_seen; +- uint32_t write_mask; +- uint32_t read_mask; +- size_t first_assigned; +- } *ssas; ++ if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)) ++ return VKD3D_OK; + +- enum vkd3d_shader_opcode *blocks; +- size_t depth; +- size_t blocks_capacity; +-}; ++ /* We could check the value and skip this if NONE, but chances are if a ++ * user specifies the fog fragment mode as a parameter, they'll want to ++ * enable it dynamically. Always specifying it (and hence always outputting ++ * it from the VS) avoids an extra VS variant. */ + +-static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, +- enum vkd3d_shader_error error, const char *format, ...) +-{ +- struct vkd3d_string_buffer buf; +- va_list args; ++ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) ++ return VKD3D_OK; + +- vkd3d_string_buffer_init(&buf); ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ register_idx = max(register_idx, signature->elements[i].register_index + 1); + +- va_start(args, format); +- vkd3d_string_buffer_vprintf(&buf, format, args); +- va_end(args); ++ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ return VKD3D_OK; ++} + +- if (ctx->invalid_instruction_idx) +- { +- vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); +- WARN("VSIR validation error: %s\n", buf.buffer); +- } +- else ++static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program, ++ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode, ++ uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp, ++ size_t *ret_pos, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ struct vkd3d_shader_location loc = ret->location; ++ uint32_t ssa_factor = program->ssa_count++; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ uint32_t ssa_temp, ssa_temp2; ++ ++ switch (mode) + { +- const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; +- vkd3d_shader_error(ctx->message_context, &ins->location, error, +- "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); +- WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); +- } ++ case VKD3D_SHADER_FOG_FRAGMENT_LINEAR: ++ /* We generate the following code: ++ * ++ * add sr0, FOG_END, -vFOG.x ++ * mul_sat srFACTOR, sr0, FOG_SCALE ++ */ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ *ret_pos = pos + 4; ++ ++ ssa_temp = program->ssa_count++; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_ADD, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_END, VKD3D_DATA_FLOAT); ++ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); ++ ins->src[1].reg.idx[0].offset = fog_signature_idx; ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ ins->src[1].modifiers = VKD3DSPSM_NEG; ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); ++ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; ++ src_param_init_ssa_float(&ins->src[0], ssa_temp); ++ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); ++ break; + +- vkd3d_string_buffer_cleanup(&buf); ++ case VKD3D_SHADER_FOG_FRAGMENT_EXP: ++ /* We generate the following code: ++ * ++ * mul sr0, FOG_SCALE, vFOG.x ++ * exp_sat srFACTOR, -sr0 ++ */ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ *ret_pos = pos + 4; + +- if (!ctx->status) +- ctx->status = VKD3D_ERROR_INVALID_SHADER; +-} ++ ssa_temp = program->ssa_count++; + +-static void vsir_validate_register_without_indices(struct validation_context *ctx, +- const struct vkd3d_shader_register *reg) +-{ +- if (reg->idx_count != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, +- "Invalid index count %u for a register of type %#x.", +- reg->idx_count, reg->type); +-} ++ ins = &program->instructions.elements[pos]; + +-static void vsir_validate_io_register(struct validation_context *ctx, +- const struct vkd3d_shader_register *reg) +-{ +- const struct shader_signature *signature; +- bool has_control_point = false; ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); ++ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); ++ ins->src[1].reg.idx[0].offset = fog_signature_idx; ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + +- switch (reg->type) +- { +- case VKD3DSPR_INPUT: +- signature = &ctx->program->input_signature; ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); ++ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; ++ src_param_init_ssa_float(&ins->src[0], ssa_temp); ++ ins->src[0].modifiers = VKD3DSPSM_NEG; ++ break; + +- switch (ctx->program->shader_version.type) +- { +- case VKD3D_SHADER_TYPE_GEOMETRY: +- case VKD3D_SHADER_TYPE_HULL: +- case VKD3D_SHADER_TYPE_DOMAIN: +- has_control_point = true; +- break; +- +- default: +- break; +- } ++ case VKD3D_SHADER_FOG_FRAGMENT_EXP2: ++ /* We generate the following code: ++ * ++ * mul sr0, FOG_SCALE, vFOG.x ++ * mul sr1, sr0, sr0 ++ * exp_sat srFACTOR, -sr1 ++ */ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ *ret_pos = pos + 5; ++ ++ ssa_temp = program->ssa_count++; ++ ssa_temp2 = program->ssa_count++; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); ++ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); ++ ins->src[1].reg.idx[0].offset = fog_signature_idx; ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); ++ src_param_init_ssa_float(&ins->src[0], ssa_temp); ++ src_param_init_ssa_float(&ins->src[1], ssa_temp); ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); ++ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; ++ src_param_init_ssa_float(&ins->src[0], ssa_temp2); ++ ins->src[0].modifiers = VKD3DSPSM_NEG; + break; + +- case VKD3DSPR_OUTPUT: +- switch (ctx->program->shader_version.type) ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ /* We generate the following code: ++ * ++ * add sr0, FRAG_COLOUR, -FOG_COLOUR ++ * mad oC0, sr0, srFACTOR, FOG_COLOUR ++ */ ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_ADD, 1, 2); ++ dst_param_init_ssa_float4(&ins->dst[0], program->ssa_count++); ++ src_param_init_temp_float4(&ins->src[0], colour_temp); ++ src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); ++ ins->src[1].modifiers = VKD3DSPSM_NEG; ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MAD, 1, 3); ++ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, colour_signature_idx, ++ program->output_signature.elements[colour_signature_idx].mask); ++ src_param_init_ssa_float4(&ins->src[0], program->ssa_count - 1); ++ src_param_init_ssa_float(&ins->src[1], ssa_factor); ++ src_param_init_parameter_vec4(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct vkd3d_shader_message_context *message_context = ctx->message_context; ++ uint32_t colour_signature_idx, fog_signature_idx, colour_temp; ++ const struct vkd3d_shader_parameter1 *mode_parameter = NULL; ++ static const struct vkd3d_shader_location no_loc; ++ const struct signature_element *fog_element; ++ enum vkd3d_shader_fog_fragment_mode mode; ++ struct vkd3d_shader_instruction *ins; ++ size_t new_pos; ++ int ret; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; ++ ++ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx)) ++ return VKD3D_OK; ++ ++ if (!(mode_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE))) ++ return VKD3D_OK; ++ ++ if (mode_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported fog fragment mode parameter type %#x.", mode_parameter->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (mode_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid fog fragment mode parameter data type %#x.", mode_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ mode = mode_parameter->u.immediate_constant.u.u32; ++ ++ if (mode == VKD3D_SHADER_FOG_FRAGMENT_NONE) ++ return VKD3D_OK; ++ ++ /* Should have been added by vsir_program_add_fog_input(). */ ++ if (!(fog_element = vsir_signature_find_element_by_name(&program->input_signature, "FOG", 0))) ++ { ++ ERR("Fog input not found.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ fog_signature_idx = fog_element - program->input_signature.elements; ++ ++ /* We're going to be reading from the output, so we need to go ++ * through the whole shader and convert it to a temp. */ ++ colour_temp = program->temp_count++; ++ ++ for (size_t i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ if (ins->opcode == VKD3DSIH_RET) ++ { ++ if ((ret = insert_fragment_fog_before_ret(program, ins, mode, fog_signature_idx, ++ colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) ++ return ret; ++ i = new_pos; ++ continue; ++ } ++ ++ for (size_t j = 0; j < ins->dst_count; ++j) ++ { ++ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; ++ ++ /* Note we run after I/O normalization. */ ++ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) + { +- case VKD3D_SHADER_TYPE_HULL: +- if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE +- || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) +- { +- signature = &ctx->program->output_signature; +- has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; +- } +- else +- { +- signature = &ctx->program->patch_constant_signature; +- } +- break; ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = colour_temp; ++ } ++ } ++ } + +- default: +- signature = &ctx->program->output_signature; +- break; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_add_fog_output(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct shader_signature *signature = &program->output_signature; ++ const struct vkd3d_shader_parameter1 *source_parameter; ++ uint32_t register_idx = 0; ++ ++ if (!is_pre_rasterization_shader(program->shader_version.type)) ++ return VKD3D_OK; ++ ++ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) ++ return VKD3D_OK; ++ ++ if (source_parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ enum vkd3d_shader_fog_source source = source_parameter->u.immediate_constant.u.u32; ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG) ++ return VKD3D_OK; ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W ++ && !vsir_signature_find_element_by_name(signature, "COLOR", 1)) ++ return VKD3D_OK; ++ } ++ ++ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) ++ return VKD3D_OK; ++ ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ register_idx = max(register_idx, signature->elements[i].register_index + 1); ++ ++ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program, ++ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_source source, uint32_t temp, ++ uint32_t fog_signature_idx, uint32_t source_signature_idx, size_t *ret_pos) ++{ ++ const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ /* Write the fog output. */ ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); ++ src_param_init_temp_float4(&ins->src[0], temp); ++ if (source == VKD3D_SHADER_FOG_SOURCE_Z) ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); ++ else /* Position or specular W. */ ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ ++ins; ++ ++ /* Write the position or specular output. */ ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), ++ source_signature_idx, e->mask); ++ src_param_init_temp_float4(&ins->src[0], temp); ++ ++ins; ++ ++ *ret_pos = pos + 2; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct vkd3d_shader_message_context *message_context = ctx->message_context; ++ const struct vkd3d_shader_parameter1 *source_parameter = NULL; ++ uint32_t fog_signature_idx, source_signature_idx, temp; ++ static const struct vkd3d_shader_location no_loc; ++ enum vkd3d_shader_fog_source source; ++ const struct signature_element *e; ++ ++ if (!is_pre_rasterization_shader(program->shader_version.type)) ++ return VKD3D_OK; ++ ++ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) ++ return VKD3D_OK; ++ ++ if (source_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported fog source parameter type %#x.", source_parameter->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (source_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid fog source parameter data type %#x.", source_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ source = source_parameter->u.immediate_constant.u.u32; ++ ++ TRACE("Fog source %#x.\n", source); ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG) ++ return VKD3D_OK; ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W) ++ { ++ if (program->has_fog || !(e = vsir_signature_find_element_by_name(&program->output_signature, "COLOR", 1))) ++ return VKD3D_OK; ++ source_signature_idx = e - program->output_signature.elements; ++ } ++ else ++ { ++ if (!vsir_signature_find_sysval(&program->output_signature, ++ VKD3D_SHADER_SV_POSITION, 0, &source_signature_idx)) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, ++ VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ } ++ ++ if (!(e = vsir_signature_find_element_by_name(&program->output_signature, "FOG", 0))) ++ { ++ ERR("Fog output not found.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ fog_signature_idx = e - program->output_signature.elements; ++ ++ temp = program->temp_count++; ++ ++ /* Insert a fog write before each ret, and convert either specular or ++ * position output to a temp. */ ++ for (size_t i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ if (ins->opcode == VKD3DSIH_RET) ++ { ++ size_t new_pos; ++ int ret; ++ ++ if ((ret = insert_vertex_fog_before_ret(program, ins, source, temp, ++ fog_signature_idx, source_signature_idx, &new_pos)) < 0) ++ return ret; ++ i = new_pos; ++ continue; ++ } ++ ++ for (size_t j = 0; j < ins->dst_count; ++j) ++ { ++ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; ++ ++ /* Note we run after I/O normalization. */ ++ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == source_signature_idx) ++ { ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = temp; + } +- break; ++ } ++ } + +- case VKD3DSPR_INCONTROLPOINT: +- signature = &ctx->program->input_signature; +- has_control_point = true; +- break; ++ program->has_fog = true; + +- case VKD3DSPR_OUTCONTROLPOINT: +- signature = &ctx->program->output_signature; +- has_control_point = true; +- break; ++ return VKD3D_OK; ++} + +- case VKD3DSPR_PATCHCONST: +- signature = &ctx->program->patch_constant_signature; +- break; ++struct validation_context ++{ ++ struct vkd3d_shader_message_context *message_context; ++ const struct vsir_program *program; ++ size_t instruction_idx; ++ struct vkd3d_shader_location null_location; ++ bool invalid_instruction_idx; ++ enum vkd3d_result status; ++ bool dcl_temps_found; ++ enum vkd3d_shader_opcode phase; ++ bool inside_block; ++ ++ struct validation_context_temp_data ++ { ++ enum vsir_dimension dimension; ++ size_t first_seen; ++ } *temps; ++ ++ struct validation_context_ssa_data ++ { ++ enum vsir_dimension dimension; ++ enum vkd3d_data_type data_type; ++ size_t first_seen; ++ uint32_t write_mask; ++ uint32_t read_mask; ++ size_t first_assigned; ++ } *ssas; ++ ++ enum vkd3d_shader_opcode *blocks; ++ size_t depth; ++ size_t blocks_capacity; ++ ++ unsigned int outer_tess_idxs[4]; ++ unsigned int inner_tess_idxs[2]; ++ ++ struct validation_context_signature_data ++ { ++ struct validation_context_signature_stream_data ++ { ++ struct validation_context_signature_register_data ++ { ++ struct validation_context_signature_component_data ++ { ++ const struct signature_element *element; ++ } components[VKD3D_VEC4_SIZE]; ++ } registers[MAX_REG_OUTPUT]; ++ } streams[VKD3D_MAX_STREAM_COUNT]; ++ } input_signature_data, output_signature_data, patch_constant_signature_data; ++}; ++ ++static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, ++ enum vkd3d_shader_error error, const char *format, ...) ++{ ++ struct vkd3d_string_buffer buf; ++ va_list args; ++ ++ vkd3d_string_buffer_init(&buf); ++ ++ va_start(args, format); ++ vkd3d_string_buffer_vprintf(&buf, format, args); ++ va_end(args); ++ ++ if (ctx->invalid_instruction_idx) ++ { ++ vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); ++ WARN("VSIR validation error: %s\n", buf.buffer); ++ } ++ else ++ { ++ const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; ++ vkd3d_shader_error(ctx->message_context, &ins->location, error, ++ "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); ++ WARN("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); ++ } ++ ++ vkd3d_string_buffer_cleanup(&buf); ++ ++ if (!ctx->status) ++ ctx->status = VKD3D_ERROR_INVALID_SHADER; ++} ++ ++static void vsir_validate_register_without_indices(struct validation_context *ctx, ++ const struct vkd3d_shader_register *reg) ++{ ++ if (reg->idx_count != 0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u for a register of type %#x.", ++ reg->idx_count, reg->type); ++} ++ ++enum vsir_signature_type ++{ ++ SIGNATURE_TYPE_INPUT, ++ SIGNATURE_TYPE_OUTPUT, ++ SIGNATURE_TYPE_PATCH_CONSTANT, ++}; ++ ++enum vsir_io_reg_type ++{ ++ REG_V, ++ REG_O, ++ REG_VPC, ++ REG_VICP, ++ REG_VOCP, ++ REG_COUNT, ++}; ++ ++enum vsir_phase ++{ ++ PHASE_NONE, ++ PHASE_CONTROL_POINT, ++ PHASE_FORK, ++ PHASE_JOIN, ++ PHASE_COUNT, ++}; ++ ++struct vsir_io_register_data ++{ ++ unsigned int flags; ++ enum vsir_signature_type signature_type; ++ const struct shader_signature *signature; ++ unsigned int control_point_count; ++}; ++ ++enum ++{ ++ INPUT_BIT = (1u << 0), ++ OUTPUT_BIT = (1u << 1), ++ CONTROL_POINT_BIT = (1u << 2), ++}; ++ ++static const struct vsir_io_register_data vsir_sm4_io_register_data ++ [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] = ++{ ++ [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] = ++ { ++ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] = ++ { ++ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] = ++ { ++ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] = ++ { ++ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] = ++ { ++ [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ /* According to MSDN, vpc is not allowed in fork phases. However we ++ * don't really distinguish between fork and join phases, so we ++ * allow it. */ ++ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ }, ++ [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] = ++ { ++ [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_VOCP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ }, ++ [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] = ++ { ++ [REG_VICP] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++}; ++ ++static const struct vsir_io_register_data vsir_sm6_io_register_data ++ [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT] = ++{ ++ [VKD3D_SHADER_TYPE_PIXEL][PHASE_NONE] = ++ { ++ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_VERTEX][PHASE_NONE] = ++ { ++ [REG_V] = {INPUT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_GEOMETRY][PHASE_NONE] = ++ { ++ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_HULL][PHASE_CONTROL_POINT] = ++ { ++ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {OUTPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++ [VKD3D_SHADER_TYPE_HULL][PHASE_FORK] = ++ { ++ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ }, ++ [VKD3D_SHADER_TYPE_HULL][PHASE_JOIN] = ++ { ++ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_O] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ [REG_VPC] = {INPUT_BIT | OUTPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ }, ++ [VKD3D_SHADER_TYPE_DOMAIN][PHASE_NONE] = ++ { ++ [REG_V] = {INPUT_BIT | CONTROL_POINT_BIT, SIGNATURE_TYPE_INPUT}, ++ [REG_VPC] = {INPUT_BIT, SIGNATURE_TYPE_PATCH_CONSTANT}, ++ [REG_O] = {OUTPUT_BIT, SIGNATURE_TYPE_OUTPUT}, ++ }, ++}; ++ ++static const bool vsir_get_io_register_data(struct validation_context *ctx, ++ enum vkd3d_shader_register_type register_type, struct vsir_io_register_data *data) ++{ ++ const struct vsir_io_register_data (*signature_register_data) ++ [VKD3D_SHADER_TYPE_GRAPHICS_COUNT][PHASE_COUNT][REG_COUNT]; ++ enum vsir_io_reg_type io_reg_type; ++ enum vsir_phase phase; ++ ++ if (ctx->program->shader_version.type >= ARRAY_SIZE(*signature_register_data)) ++ return NULL; ++ ++ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) ++ signature_register_data = &vsir_sm6_io_register_data; ++ else ++ signature_register_data = &vsir_sm4_io_register_data; ++ ++ switch (register_type) ++ { ++ case VKD3DSPR_INPUT: io_reg_type = REG_V; break; ++ case VKD3DSPR_OUTPUT: io_reg_type = REG_O; break; ++ case VKD3DSPR_INCONTROLPOINT: io_reg_type = REG_VICP; break; ++ case VKD3DSPR_OUTCONTROLPOINT: io_reg_type = REG_VOCP; break; ++ case VKD3DSPR_PATCHCONST: io_reg_type = REG_VPC; break; ++ ++ default: ++ return NULL; ++ } ++ ++ switch (ctx->phase) ++ { ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: phase = PHASE_CONTROL_POINT; break; ++ case VKD3DSIH_HS_FORK_PHASE: phase = PHASE_FORK; break; ++ case VKD3DSIH_HS_JOIN_PHASE: phase = PHASE_JOIN; break; ++ case VKD3DSIH_INVALID: phase = PHASE_NONE; break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ *data = (*signature_register_data)[ctx->program->shader_version.type][phase][io_reg_type]; ++ ++ if (!(data->flags & (INPUT_BIT | OUTPUT_BIT))) ++ return false; ++ ++ /* VSIR_NORMALISED_HULL_CONTROL_POINT_IO differs from VSIR_NORMALISED_SM4 ++ * for just a single flag. So we don't keep a whole copy of it, but just ++ * patch SM4 when needed. */ ++ if (ctx->program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO ++ && ctx->program->shader_version.type == VKD3D_SHADER_TYPE_HULL ++ && phase == PHASE_CONTROL_POINT && io_reg_type == REG_O) ++ { ++ VKD3D_ASSERT(!(data->flags & CONTROL_POINT_BIT)); ++ data->flags |= CONTROL_POINT_BIT; ++ } ++ ++ switch (data->signature_type) ++ { ++ case SIGNATURE_TYPE_INPUT: ++ data->signature = &ctx->program->input_signature; ++ data->control_point_count = ctx->program->input_control_point_count; ++ return true; ++ ++ case SIGNATURE_TYPE_OUTPUT: ++ data->signature = &ctx->program->output_signature; ++ data->control_point_count = ctx->program->output_control_point_count; ++ return true; ++ ++ case SIGNATURE_TYPE_PATCH_CONSTANT: ++ data->signature = &ctx->program->patch_constant_signature; ++ return true; + + default: + vkd3d_unreachable(); + } ++} ++ ++static void vsir_validate_io_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) ++{ ++ unsigned int control_point_index, control_point_count; ++ const struct shader_signature *signature; ++ struct vsir_io_register_data io_reg_data; ++ bool has_control_point; ++ ++ if (!vsir_get_io_register_data(ctx, reg->type, &io_reg_data)) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid usage of register type %#x.", reg->type); ++ return; ++ } ++ ++ signature = io_reg_data.signature; ++ has_control_point = io_reg_data.flags & CONTROL_POINT_BIT; ++ control_point_count = io_reg_data.control_point_count; + +- if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) ++ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6) + { + /* Indices are [register] or [control point, register]. Both are + * allowed to have a relative address. */ + unsigned int expected_idx_count = 1 + !!has_control_point; + ++ control_point_index = 0; ++ + if (reg->idx_count != expected_idx_count) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, +@@ -6795,7 +7432,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, + /* If the signature element is not an array, indices are + * [signature] or [control point, signature]. If the signature + * element is an array, indices are [array, signature] or +- * [control point, array, signature]. In any case `signature' is ++ * [array, control point, signature]. In any case `signature' is + * not allowed to have a relative address, while the others are. + */ + if (reg->idx_count < 1) +@@ -6829,6 +7466,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, + is_array = true; + + expected_idx_count = 1 + !!has_control_point + !!is_array; ++ control_point_index = !!is_array; + + if (reg->idx_count != expected_idx_count) + { +@@ -6837,7 +7475,18 @@ static void vsir_validate_io_register(struct validation_context *ctx, + reg->idx_count, reg->type); + return; + } ++ ++ if (is_array && !reg->idx[0].rel_addr && reg->idx[0].offset >= element->register_count) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Array index %u exceeds the signature element register count %u in a register of type %#x.", ++ reg->idx[0].offset, element->register_count, reg->type); + } ++ ++ if (has_control_point && !reg->idx[control_point_index].rel_addr ++ && reg->idx[control_point_index].offset >= control_point_count) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Control point index %u exceeds the control point count %u in a register of type %#x.", ++ reg->idx[control_point_index].offset, control_point_count, reg->type); + } + + static void vsir_validate_temp_register(struct validation_context *ctx, +@@ -7143,8 +7792,26 @@ static void vsir_validate_register(struct validation_context *ctx, + for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) + { + const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; +- if (reg->idx[i].rel_addr) ++ if (param) ++ { + vsir_validate_src_param(ctx, param); ++ ++ switch (param->reg.type) ++ { ++ case VKD3DSPR_TEMP: ++ case VKD3DSPR_SSA: ++ case VKD3DSPR_ADDR: ++ case VKD3DSPR_LOOP: ++ case VKD3DSPR_OUTPOINTID: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x for a relative address parameter.", ++ param->reg.type); ++ break; ++ } ++ } + } + + switch (reg->type) +@@ -7185,6 +7852,10 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_register_without_indices(ctx, reg); + break; + ++ case VKD3DSPR_PRIMID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_NULL: + vsir_validate_register_without_indices(ctx, reg); + break; +@@ -7201,6 +7872,18 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_uav_register(ctx, reg); + break; + ++ case VKD3DSPR_OUTPOINTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_FORKINSTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_JOININSTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_INCONTROLPOINT: + vsir_validate_io_register(ctx, reg); + break; +@@ -7213,6 +7896,38 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_io_register(ctx, reg); + break; + ++ case VKD3DSPR_TESSCOORD: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_THREADID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_THREADGROUPID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_LOCALTHREADID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_LOCALTHREADINDEX: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_COVERAGE: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_SAMPLEMASK: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_GSINSTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_DEPTHOUTGE: + vsir_validate_register_without_indices(ctx, reg); + break; +@@ -7221,15 +7936,37 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_register_without_indices(ctx, reg); + break; + ++ case VKD3DSPR_OUTSTENCILREF: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_SSA: + vsir_validate_ssa_register(ctx, reg); + break; + ++ case VKD3DSPR_WAVELANECOUNT: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_WAVELANEINDEX: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + default: + break; + } + } + ++static void vsir_validate_io_dst_param(struct validation_context *ctx, ++ const struct vkd3d_shader_dst_param *dst) ++{ ++ struct vsir_io_register_data io_reg_data; ++ ++ if (!vsir_get_io_register_data(ctx, dst->reg.type, &io_reg_data) || !(io_reg_data.flags & OUTPUT_BIT)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x used as destination parameter.", dst->reg.type); ++} ++ + static void vsir_validate_dst_param(struct validation_context *ctx, + const struct vkd3d_shader_dst_param *dst) + { +@@ -7304,15 +8041,28 @@ static void vsir_validate_dst_param(struct validation_context *ctx, + case VKD3DSPR_IMMCONST64: + case VKD3DSPR_SAMPLER: + case VKD3DSPR_RESOURCE: +- case VKD3DSPR_INPUT: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid %#x register used as destination parameter.", dst->reg.type); + break; + ++ case VKD3DSPR_INPUT: ++ vsir_validate_io_dst_param(ctx, dst); ++ break; ++ ++ case VKD3DSPR_OUTPUT: ++ vsir_validate_io_dst_param(ctx, dst); ++ break; ++ ++ case VKD3DSPR_INCONTROLPOINT: ++ vsir_validate_io_dst_param(ctx, dst); ++ break; ++ ++ case VKD3DSPR_OUTCONTROLPOINT: ++ vsir_validate_io_dst_param(ctx, dst); ++ break; ++ + case VKD3DSPR_PATCHCONST: +- if (ctx->program->shader_version.type != VKD3D_SHADER_TYPE_HULL) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "PATCHCONST register used as destination parameters are only allowed in Hull Shaders."); ++ vsir_validate_io_dst_param(ctx, dst); + break; + + default: +@@ -7420,13 +8170,6 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, + return true; + } + +-enum vsir_signature_type +-{ +- SIGNATURE_TYPE_INPUT, +- SIGNATURE_TYPE_OUTPUT, +- SIGNATURE_TYPE_PATCH_CONSTANT, +-}; +- + static const char * const signature_type_names[] = + { + [SIGNATURE_TYPE_INPUT] = "input", +@@ -7466,17 +8209,32 @@ sysval_validation_data[] = + }; + + static void vsir_validate_signature_element(struct validation_context *ctx, +- const struct shader_signature *signature, enum vsir_signature_type signature_type, +- unsigned int idx) ++ const struct shader_signature *signature, struct validation_context_signature_data *signature_data, ++ enum vsir_signature_type signature_type, unsigned int idx) + { ++ enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; ++ bool integer_type = false, is_outer = false, is_gs_output, require_index = true; + const char *signature_type_name = signature_type_names[signature_type]; + const struct signature_element *element = &signature->elements[idx]; +- bool integer_type = false; ++ unsigned int semantic_index_max = 0, i, j; + + if (element->register_count == 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); + ++ if (element->register_index != UINT_MAX && (element->register_index >= MAX_REG_OUTPUT ++ || MAX_REG_OUTPUT - element->register_index < element->register_count)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid register index %u and count %u.", ++ idx, signature_type_name, element->register_index, element->register_count); ++ ++ is_gs_output = ctx->program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY ++ && signature_type == SIGNATURE_TYPE_OUTPUT; ++ if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || (element->stream_index != 0 && !is_gs_output)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid stream index %u.", ++ idx, signature_type_name, element->stream_index); ++ + if (element->mask == 0 || (element->mask & ~0xf)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); +@@ -7486,33 +8244,27 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + "element %u of %s signature: Non-contiguous mask %#x.", + idx, signature_type_name, element->mask); + +- /* Here we'd likely want to validate that the usage mask is a subset of the +- * signature mask. Unfortunately the D3DBC parser sometimes violates this. +- * For example I've seen a shader like this: +- * ps_3_0 +- * [...] +- * dcl_texcoord0 v0 +- * [...] +- * texld r2.xyzw, v0.xyzw, s1.xyzw +- * [...] +- * +- * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to +- * compute the signature mask, but the texld instruction apparently uses all +- * the components. Of course the last two components are ignored, but +- * formally they seem to be used. So we end up with a signature element with +- * mask .xy and usage mask .xyzw. +- * +- * The correct fix would probably be to make the D3DBC parser aware of which +- * components are really used for each instruction, but that would take some +- * time. */ +- if (element->used_mask & ~0xf) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, +- "element %u of %s signature: Invalid usage mask %#x.", +- idx, signature_type_name, element->used_mask); ++ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM4) ++ { ++ if ((element->used_mask & element->mask) != element->used_mask) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid usage mask %#x with mask %#x.", ++ idx, signature_type_name, element->used_mask, element->mask); ++ } ++ else ++ { ++ if (element->used_mask & ~0xf) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid usage mask %#x.", ++ idx, signature_type_name, element->used_mask); ++ } + + switch (element->sysval_semantic) + { + case VKD3D_SHADER_SV_NONE: ++ case VKD3D_SHADER_SV_TARGET: ++ break; ++ + case VKD3D_SHADER_SV_POSITION: + case VKD3D_SHADER_SV_CLIP_DISTANCE: + case VKD3D_SHADER_SV_CULL_DISTANCE: +@@ -7523,18 +8275,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ case VKD3D_SHADER_SV_DEPTH: ++ case VKD3D_SHADER_SV_COVERAGE: ++ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: ++ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: ++ case VKD3D_SHADER_SV_STENCIL_REF: ++ require_index = false; ++ break; ++ + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; ++ semantic_index_max = 4; ++ is_outer = true; ++ break; ++ + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; ++ semantic_index_max = 2; ++ is_outer = false; ++ break; ++ + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; ++ semantic_index_max = 3; ++ is_outer = true; ++ break; ++ + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; ++ semantic_index_max = 1; ++ is_outer = false; ++ break; ++ + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: +- case VKD3D_SHADER_SV_TARGET: +- case VKD3D_SHADER_SV_DEPTH: +- case VKD3D_SHADER_SV_COVERAGE: +- case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: +- case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: +- case VKD3D_SHADER_SV_STENCIL_REF: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; ++ semantic_index_max = 2; ++ is_outer = true; + break; + + default: +@@ -7544,6 +8321,43 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + break; + } + ++ if (require_index && element->register_index == UINT_MAX) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: System value semantic %#x requires a register index.", ++ idx, signature_type_name, element->sysval_semantic); ++ ++ if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) ++ { ++ if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: System value semantic %#x is only valid " ++ "in the patch constant signature.", ++ idx, signature_type_name, element->sysval_semantic); ++ ++ if (ctx->program->tess_domain != expected_tess_domain) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", ++ idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); ++ ++ if (element->semantic_index >= semantic_index_max) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", ++ idx, signature_type_name, element->semantic_index, element->sysval_semantic); ++ } ++ else ++ { ++ unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; ++ ++ if (*idx_pos != ~0u) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", ++ idx, signature_type_name, element->semantic_index, element->sysval_semantic); ++ else ++ *idx_pos = idx; ++ } ++ } ++ + if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) + { + const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; +@@ -7622,6 +8436,31 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", + idx, signature_type_name, element->interpolation_mode); ++ ++ if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || !require_index) ++ return; ++ ++ for (i = element->register_index; i < MAX_REG_OUTPUT ++ && i - element->register_index < element->register_count; ++i) ++ { ++ struct validation_context_signature_stream_data *stream_data = &signature_data->streams[element->stream_index]; ++ struct validation_context_signature_register_data *register_data = &stream_data->registers[i]; ++ ++ for (j = 0; j < VKD3D_VEC4_SIZE; ++j) ++ { ++ struct validation_context_signature_component_data *component_data = ®ister_data->components[j]; ++ ++ if (!(element->mask & (1u << j))) ++ continue; ++ ++ if (!component_data->element) ++ component_data->element = element; ++ else ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Conflict with element %zu.", ++ idx, signature_type_name, component_data->element - signature->elements); ++ } ++ } + } + + static const unsigned int allowed_signature_phases[] = +@@ -7631,8 +8470,8 @@ static const unsigned int allowed_signature_phases[] = + [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, + }; + +-static void vsir_validate_signature(struct validation_context *ctx, +- const struct shader_signature *signature, enum vsir_signature_type signature_type) ++static void vsir_validate_signature(struct validation_context *ctx, const struct shader_signature *signature, ++ struct validation_context_signature_data *signature_data, enum vsir_signature_type signature_type) + { + unsigned int i; + +@@ -7642,7 +8481,110 @@ static void vsir_validate_signature(struct validation_context *ctx, + "Unexpected %s signature.", signature_type_names[signature_type]); + + for (i = 0; i < signature->element_count; ++i) +- vsir_validate_signature_element(ctx, signature, signature_type, i); ++ vsir_validate_signature_element(ctx, signature, signature_data, signature_type, i); ++ ++ if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT) ++ { ++ const struct signature_element *first_element, *element; ++ unsigned int expected_outer_count = 0; ++ unsigned int expected_inner_count = 0; ++ ++ switch (ctx->program->tess_domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ expected_outer_count = 4; ++ expected_inner_count = 2; ++ break; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ expected_outer_count = 3; ++ expected_inner_count = 1; ++ break; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ expected_outer_count = 2; ++ expected_inner_count = 0; ++ break; ++ ++ default: ++ break; ++ } ++ ++ /* After I/O normalisation tessellation factors are merged in a single array. */ ++ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) ++ { ++ expected_outer_count = min(1, expected_outer_count); ++ expected_inner_count = min(1, expected_inner_count); ++ } ++ ++ first_element = NULL; ++ for (i = 0; i < expected_outer_count; ++i) ++ { ++ if (ctx->outer_tess_idxs[i] == ~0u) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Missing outer system value semantic %u.", i); ++ } ++ else ++ { ++ element = &signature->elements[ctx->outer_tess_idxs[i]]; ++ ++ if (!first_element) ++ { ++ first_element = element; ++ continue; ++ } ++ ++ if (element->register_index != first_element->register_index + i) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid register index %u for outer system value semantic %u, expected %u.", ++ element->register_index, i, first_element->register_index + i); ++ } ++ ++ if (element->mask != first_element->mask) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Invalid mask %#x for outer system value semantic %u, expected %#x.", ++ element->mask, i, first_element->mask); ++ } ++ } ++ } ++ ++ first_element = NULL; ++ for (i = 0; i < expected_inner_count; ++i) ++ { ++ if (ctx->inner_tess_idxs[i] == ~0u) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Missing inner system value semantic %u.", i); ++ } ++ else ++ { ++ element = &signature->elements[ctx->inner_tess_idxs[i]]; ++ ++ if (!first_element) ++ { ++ first_element = element; ++ continue; ++ } ++ ++ if (element->register_index != first_element->register_index + i) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid register index %u for inner system value semantic %u, expected %u.", ++ element->register_index, i, first_element->register_index + i); ++ } ++ ++ if (element->mask != first_element->mask) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Invalid mask %#x for inner system value semantic %u, expected %#x.", ++ element->mask, i, first_element->mask); ++ } ++ } ++ } ++ } + } + + static const char *name_from_cf_type(enum vsir_control_flow_type type) +@@ -7754,6 +8696,206 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, + instruction->declaration.max_tessellation_factor); + } + ++static void vsir_validate_dcl_index_range(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ unsigned int i, j, base_register_idx, effective_write_mask = 0, control_point_count, first_component = UINT_MAX; ++ const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; ++ enum vkd3d_shader_sysval_semantic sysval = ~0u; ++ const struct shader_signature *signature; ++ struct vsir_io_register_data io_reg_data; ++ bool has_control_point; ++ ++ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, ++ "DCL_INDEX_RANGE is not allowed with fully normalised input/output."); ++ return; ++ } ++ ++ if (range->dst.modifiers != VKD3DSPDM_NONE) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, ++ "Invalid modifier %#x on a DCL_INDEX_RANGE destination parameter.", range->dst.modifiers); ++ ++ if (range->dst.shift != 0) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, ++ "Invalid shift %u on a DCL_INDEX_RANGE destination parameter.", range->dst.shift); ++ ++ if (!vsir_get_io_register_data(ctx, range->dst.reg.type, &io_reg_data)) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in DCL_INDEX_RANGE instruction.", ++ range->dst.reg.type); ++ return; ++ } ++ ++ signature = io_reg_data.signature; ++ has_control_point = io_reg_data.flags & CONTROL_POINT_BIT; ++ control_point_count = io_reg_data.control_point_count; ++ ++ if (range->dst.reg.idx_count != 1 + !!has_control_point) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, ++ "Invalid index count %u in DCL_INDEX_RANGE instruction.", ++ range->dst.reg.idx_count); ++ return; ++ } ++ ++ if (range->dst.reg.idx[0].rel_addr || (has_control_point && range->dst.reg.idx[1].rel_addr)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid relative address in DCL_INDEX_RANGE instruction."); ++ ++ if (has_control_point) ++ { ++ if (range->dst.reg.idx[0].offset != control_point_count) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid control point index %u in DCL_INDEX_RANGE instruction, expected %u.", ++ range->dst.reg.idx[0].offset, control_point_count); ++ } ++ ++ base_register_idx = range->dst.reg.idx[1].offset; ++ } ++ else ++ { ++ base_register_idx = range->dst.reg.idx[0].offset; ++ } ++ ++ if (range->register_count < 2) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE, ++ "Invalid register count %u in DCL_INDEX_RANGE instruction, expected at least 2.", ++ range->register_count); ++ return; ++ } ++ ++ /* Check that for each register in the range the write mask intersects at ++ * most one (and possibly zero) signature elements. Keep track of the union ++ * of all signature element masks. */ ++ for (i = 0; i < range->register_count; ++i) ++ { ++ bool found = false; ++ ++ for (j = 0; j < signature->element_count; ++j) ++ { ++ const struct signature_element *element = &signature->elements[j]; ++ ++ if (base_register_idx + i != element->register_index || !(range->dst.write_mask & element->mask)) ++ continue; ++ ++ if (found) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", ++ range->dst.write_mask); ++ ++ found = true; ++ ++ if (first_component == UINT_MAX) ++ first_component = vsir_write_mask_get_component_idx(element->mask); ++ else if (first_component != vsir_write_mask_get_component_idx(element->mask)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Signature masks are not left-aligned within a DCL_INDEX_RANGE."); ++ ++ effective_write_mask |= element->mask; ++ } ++ } ++ ++ /* Check again to have at most one intersection for each register, but this ++ * time using the effective write mask. Also check that we have stabilized, ++ * i.e., the effective write mask now contains all the signature element ++ * masks. This important for being able to merge all the signature elements ++ * in a single one without conflicts (there is no hard reason why we ++ * couldn't support an effective write mask that stabilizes after more ++ * iterations, but the code would be more complicated, and we avoid that if ++ * we can). */ ++ for (i = 0; i < range->register_count; ++i) ++ { ++ bool found = false; ++ ++ for (j = 0; j < signature->element_count; ++j) ++ { ++ const struct signature_element *element = &signature->elements[j]; ++ ++ if (base_register_idx + i != element->register_index || !(effective_write_mask & element->mask)) ++ continue; ++ ++ if (element->sysval_semantic != VKD3D_SHADER_SV_NONE ++ && !vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE.", ++ element->sysval_semantic); ++ ++ if (sysval == ~0u) ++ { ++ sysval = element->sysval_semantic; ++ /* Line density and line detail can be arrayed together. */ ++ if (sysval == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) ++ sysval = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; ++ } ++ else ++ { ++ if (sysval != element->sysval_semantic) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Inconsistent sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE, " ++ "%#x was already seen.", ++ element->sysval_semantic, sysval); ++ } ++ ++ if (found) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", ++ range->dst.write_mask); ++ ++ found = true; ++ ++ if (~effective_write_mask & element->mask) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Invalid write mask %#x on a signature element touched by a " ++ "DCL_INDEX_RANGE instruction with effective write mask %#x.", ++ element->mask, effective_write_mask); ++ ++ if (first_component != vsir_write_mask_get_component_idx(element->mask)) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Signature element masks are not left-aligned within a DCL_INDEX_RANGE."); ++ } ++ } ++ ++ VKD3D_ASSERT(sysval != ~0u); ++} ++ ++static void vsir_validate_dcl_input(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.dst.reg.type) ++ { ++ /* Signature input registers. */ ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_INCONTROLPOINT: ++ case VKD3DSPR_OUTCONTROLPOINT: ++ case VKD3DSPR_PATCHCONST: ++ /* Non-signature input registers. */ ++ case VKD3DSPR_PRIMID: ++ case VKD3DSPR_FORKINSTID: ++ case VKD3DSPR_JOININSTID: ++ case VKD3DSPR_THREADID: ++ case VKD3DSPR_THREADGROUPID: ++ case VKD3DSPR_LOCALTHREADID: ++ case VKD3DSPR_LOCALTHREADINDEX: ++ case VKD3DSPR_COVERAGE: ++ case VKD3DSPR_TESSCOORD: ++ case VKD3DSPR_OUTPOINTID: ++ case VKD3DSPR_GSINSTID: ++ case VKD3DSPR_WAVELANECOUNT: ++ case VKD3DSPR_WAVELANEINDEX: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT.", ++ instruction->declaration.dst.reg.type); ++ } ++} ++ + static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) + { +@@ -7763,6 +8905,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, + instruction->declaration.primitive_type.type); + } + ++static void vsir_validate_dcl_input_ps(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.dst.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_PS.", ++ instruction->declaration.dst.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_ps_sgv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_PS_SGV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_ps_siv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_PS_SIV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_sgv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_SGV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_siv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_PATCHCONST: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_SIV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_output(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.dst.reg.type) ++ { ++ /* Signature output registers. */ ++ case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_PATCHCONST: ++ /* Non-signature output registers. */ ++ case VKD3DSPR_DEPTHOUT: ++ case VKD3DSPR_SAMPLEMASK: ++ case VKD3DSPR_DEPTHOUTGE: ++ case VKD3DSPR_DEPTHOUTLE: ++ case VKD3DSPR_OUTSTENCILREF: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_OUTPUT.", ++ instruction->declaration.dst.reg.type); ++ } ++} ++ + static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) + { +@@ -7772,6 +9013,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte + instruction->declaration.count); + } + ++static void vsir_validate_dcl_output_siv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_PATCHCONST: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_OUTPUT_SIV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ + static void vsir_validate_dcl_output_topology(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) + { +@@ -7801,6 +9058,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, + || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); ++ ++ if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", ++ instruction->declaration.tessellator_domain, ctx->program->tess_domain); + } + + static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, +@@ -8063,8 +9325,17 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ + [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, + [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, ++ [VKD3DSIH_DCL_INDEX_RANGE] = {0, 0, vsir_validate_dcl_index_range}, ++ [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input}, + [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, ++ [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps}, ++ [VKD3DSIH_DCL_INPUT_PS_SGV] = {0, 0, vsir_validate_dcl_input_ps_sgv}, ++ [VKD3DSIH_DCL_INPUT_PS_SIV] = {0, 0, vsir_validate_dcl_input_ps_siv}, ++ [VKD3DSIH_DCL_INPUT_SGV] = {0, 0, vsir_validate_dcl_input_sgv}, ++ [VKD3DSIH_DCL_INPUT_SIV] = {0, 0, vsir_validate_dcl_input_siv}, ++ [VKD3DSIH_DCL_OUTPUT] = {0, 0, vsir_validate_dcl_output}, + [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, ++ [VKD3DSIH_DCL_OUTPUT_SIV] = {0, 0, vsir_validate_dcl_output_siv}, + [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, + [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, + [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, +@@ -8177,6 +9448,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + .status = VKD3D_OK, + .phase = VKD3DSIH_INVALID, + .invalid_instruction_idx = true, ++ .outer_tess_idxs[0] = ~0u, ++ .outer_tess_idxs[1] = ~0u, ++ .outer_tess_idxs[2] = ~0u, ++ .outer_tess_idxs[3] = ~0u, ++ .inner_tess_idxs[0] = ~0u, ++ .inner_tess_idxs[1] = ~0u, + }; + unsigned int i; + +@@ -8187,12 +9464,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: ++ if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID ++ || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Invalid tessellation domain %#x.", program->tess_domain); + break; + + default: + if (program->patch_constant_signature.element_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Patch constant signature is only valid for hull and domain shaders."); ++ ++ if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Invalid tessellation domain %#x.", program->tess_domain); + } + + switch (program->shader_version.type) +@@ -8226,9 +9511,47 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + program->output_control_point_count); + } + +- vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); +- vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); +- vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); ++ vsir_validate_signature(&ctx, &program->input_signature, ++ &ctx.input_signature_data, SIGNATURE_TYPE_INPUT); ++ vsir_validate_signature(&ctx, &program->output_signature, ++ &ctx.output_signature_data, SIGNATURE_TYPE_OUTPUT); ++ vsir_validate_signature(&ctx, &program->patch_constant_signature, ++ &ctx.patch_constant_signature_data, SIGNATURE_TYPE_PATCH_CONSTANT); ++ ++ for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) ++ { ++ if (!bitmap_is_set(program->io_dcls, i)) ++ continue; ++ ++ switch (i) ++ { ++ /* Input registers */ ++ case VKD3DSPR_PRIMID: ++ case VKD3DSPR_FORKINSTID: ++ case VKD3DSPR_JOININSTID: ++ case VKD3DSPR_THREADID: ++ case VKD3DSPR_THREADGROUPID: ++ case VKD3DSPR_LOCALTHREADID: ++ case VKD3DSPR_LOCALTHREADINDEX: ++ case VKD3DSPR_COVERAGE: ++ case VKD3DSPR_TESSCOORD: ++ case VKD3DSPR_OUTPOINTID: ++ case VKD3DSPR_GSINSTID: ++ case VKD3DSPR_WAVELANECOUNT: ++ case VKD3DSPR_WAVELANEINDEX: ++ /* Output registers */ ++ case VKD3DSPR_DEPTHOUT: ++ case VKD3DSPR_SAMPLEMASK: ++ case VKD3DSPR_DEPTHOUTGE: ++ case VKD3DSPR_DEPTHOUTLE: ++ case VKD3DSPR_OUTSTENCILREF: ++ break; ++ ++ default: ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Invalid input/output declaration %u.", i); ++ } ++ } + + if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) + goto fail; +@@ -8318,6 +9641,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin + if (program->shader_version.major <= 2) + vsir_transform(&ctx, vsir_program_add_diffuse_output); + ++ /* For vsir_program_insert_fragment_fog(). */ ++ vsir_transform(&ctx, vsir_program_add_fog_input); ++ ++ /* For vsir_program_insert_vertex_fog(). */ ++ vsir_transform(&ctx, vsir_program_add_fog_output); ++ + return ctx.result; + } + +@@ -8372,6 +9701,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t + vsir_transform(&ctx, vsir_program_insert_point_size); + vsir_transform(&ctx, vsir_program_insert_point_size_clamp); + vsir_transform(&ctx, vsir_program_insert_point_coord); ++ vsir_transform(&ctx, vsir_program_insert_fragment_fog); ++ vsir_transform(&ctx, vsir_program_insert_vertex_fog); + + if (TRACE_ON()) + vsir_program_trace(program); +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index df3edeaa4e6..bb85e62e94c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -41,6 +41,8 @@ struct msl_generator + const char *prefix; + bool failed; + ++ bool write_depth; ++ + const struct vkd3d_shader_interface_info *interface_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; + }; +@@ -153,6 +155,72 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, + msl_print_register_datatype(buffer, gen, reg->data_type); + break; + ++ case VKD3DSPR_DEPTHOUT: ++ if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled depth output in shader type #%x.", ++ gen->program->shader_version.type); ++ vkd3d_string_buffer_printf(buffer, "o_depth"); ++ break; ++ ++ case VKD3DSPR_IMMCONST: ++ switch (reg->dimension) ++ { ++ case VSIR_DIMENSION_SCALAR: ++ switch (reg->data_type) ++ { ++ case VKD3D_DATA_INT: ++ vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); ++ break; ++ case VKD3D_DATA_UINT: ++ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); ++ break; ++ case VKD3D_DATA_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "", reg->data_type); ++ break; ++ } ++ break; ++ ++ case VSIR_DIMENSION_VEC4: ++ switch (reg->data_type) ++ { ++ case VKD3D_DATA_INT: ++ vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", ++ reg->u.immconst_u32[0], reg->u.immconst_u32[1], ++ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); ++ break; ++ case VKD3D_DATA_UINT: ++ vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", ++ reg->u.immconst_u32[0], reg->u.immconst_u32[1], ++ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); ++ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); ++ break; ++ case VKD3D_DATA_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", ++ reg->u.immconst_u32[0], reg->u.immconst_u32[1], ++ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "", reg->data_type); ++ break; ++ } ++ break; ++ ++ default: ++ vkd3d_string_buffer_printf(buffer, "", reg->dimension); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled dimension %#x.", reg->dimension); ++ break; ++ } ++ break; ++ + case VKD3DSPR_CONSTBUFFER: + if (reg->idx_count != 3) + { +@@ -215,19 +283,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, + const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) + { + const struct vkd3d_shader_register *reg = &vsir_src->reg; ++ struct vkd3d_string_buffer *str; + + msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + + if (reg->non_uniform) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); +- if (vsir_src->modifiers) +- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +- "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + +- msl_print_register_name(msl_src->str, gen, reg); ++ if (!vsir_src->modifiers) ++ str = msl_src->str; ++ else ++ str = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ msl_print_register_name(str, gen, reg); + if (reg->dimension == VSIR_DIMENSION_VEC4) +- msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); ++ msl_print_swizzle(str, vsir_src->swizzle, mask); ++ ++ switch (vsir_src->modifiers) ++ { ++ case VKD3DSPSM_NONE: ++ break; ++ case VKD3DSPSM_NEG: ++ vkd3d_string_buffer_printf(msl_src->str, "-%s", str->buffer); ++ break; ++ case VKD3DSPSM_ABS: ++ vkd3d_string_buffer_printf(msl_src->str, "abs(%s)", str->buffer); ++ break; ++ default: ++ vkd3d_string_buffer_printf(msl_src->str, "(%s)", ++ vsir_src->modifiers, str->buffer); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); ++ break; ++ } ++ ++ if (str != msl_src->str) ++ vkd3d_string_buffer_release(&gen->string_buffers, str); + } + + static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) +@@ -253,7 +345,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, + msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); + + msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); +- msl_print_write_mask(msl_dst->mask, write_mask); ++ if (vsir_dst->reg.dimension == VSIR_DIMENSION_VEC4) ++ msl_print_write_mask(msl_dst->mask, write_mask); + + return write_mask; + } +@@ -261,22 +354,29 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, + static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( + struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) + { ++ uint32_t modifiers = dst->vsir->modifiers; + va_list args; + + if (dst->vsir->shift) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); +- if (dst->vsir->modifiers) ++ if (modifiers & ~VKD3DSPDM_SATURATE) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +- "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); ++ "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers); + + msl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + ++ if (modifiers & VKD3DSPDM_SATURATE) ++ vkd3d_string_buffer_printf(gen->buffer, "saturate("); ++ + va_start(args, format); + vkd3d_string_buffer_vprintf(gen->buffer, format, args); + va_end(args); + ++ if (modifiers & VKD3DSPDM_SATURATE) ++ vkd3d_string_buffer_printf(gen->buffer, ")"); ++ + vkd3d_string_buffer_printf(gen->buffer, ";\n"); + } + +@@ -288,6 +388,164 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i + "Internal compiler error: Unhandled instruction %#x.", ins->opcode); + } + ++static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) ++{ ++ struct msl_src src[2]; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src[0], gen, &ins->src[0], mask); ++ msl_src_init(&src[1], gen, &ins->src[1], mask); ++ ++ msl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); ++ ++ msl_src_cleanup(&src[1], &gen->string_buffers); ++ msl_src_cleanup(&src[0], &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, uint32_t src_mask) ++{ ++ unsigned int component_count; ++ struct msl_src src[2]; ++ struct msl_dst dst; ++ uint32_t dst_mask; ++ ++ dst_mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src[0], gen, &ins->src[0], src_mask); ++ msl_src_init(&src[1], gen, &ins->src[1], src_mask); ++ ++ if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1) ++ msl_print_assignment(gen, &dst, "float%u(dot(%s, %s))", ++ component_count, src[0].str->buffer, src[1].str->buffer); ++ else ++ msl_print_assignment(gen, &dst, "dot(%s, %s)", src[0].str->buffer, src[1].str->buffer); ++ ++ msl_src_cleanup(&src[1], &gen->string_buffers); ++ msl_src_cleanup(&src[0], &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) ++{ ++ struct vkd3d_string_buffer *args; ++ struct msl_src src; ++ struct msl_dst dst; ++ unsigned int i; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ args = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ for (i = 0; i < ins->src_count; ++i) ++ { ++ msl_src_init(&src, gen, &ins->src[i], mask); ++ vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer); ++ msl_src_cleanup(&src, &gen->string_buffers); ++ } ++ ++ msl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer); ++ ++ vkd3d_string_buffer_release(&gen->string_buffers, args); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) ++{ ++ unsigned int mask_size; ++ struct msl_src src[2]; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src[0], gen, &ins->src[0], mask); ++ msl_src_init(&src[1], gen, &ins->src[1], mask); ++ ++ if ((mask_size = vsir_write_mask_component_count(mask)) > 1) ++ msl_print_assignment(gen, &dst, "select(uint%u(0u), uint%u(0xffffffffu), bool%u(%s %s %s))", ++ mask_size, mask_size, mask_size, src[0].str->buffer, op, src[1].str->buffer); ++ else ++ msl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", ++ src[0].str->buffer, op, src[1].str->buffer); ++ ++ msl_src_cleanup(&src[1], &gen->string_buffers); ++ msl_src_cleanup(&src[0], &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor) ++{ ++ unsigned int component_count; ++ struct msl_src src; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src, gen, &ins->src[0], mask); ++ ++ if ((component_count = vsir_write_mask_component_count(mask)) > 1) ++ msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer); ++ else ++ msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer); ++ ++ msl_src_cleanup(&src, &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_end_block(struct msl_generator *gen) ++{ ++ --gen->indent; ++ msl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "}\n"); ++} ++ ++static void msl_begin_block(struct msl_generator *gen) ++{ ++ msl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "{\n"); ++ ++gen->indent; ++} ++ ++static void msl_if(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ const char *condition; ++ struct msl_src src; ++ ++ msl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); ++ ++ msl_print_indent(gen->buffer, gen->indent); ++ condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; ++ vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); ++ ++ msl_src_cleanup(&src, &gen->string_buffers); ++ ++ msl_begin_block(gen); ++} ++ ++static void msl_else(struct msl_generator *gen) ++{ ++ msl_end_block(gen); ++ msl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "else\n"); ++ msl_begin_block(gen); ++} ++ ++static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) ++{ ++ struct msl_src src; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src, gen, &ins->src[0], mask); ++ ++ msl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer); ++ ++ msl_src_cleanup(&src, &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ + static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + struct msl_src src; +@@ -303,6 +561,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc + msl_dst_cleanup(&dst, &gen->string_buffers); + } + ++static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ unsigned int component_count; ++ struct msl_src src[3]; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src[0], gen, &ins->src[0], mask); ++ msl_src_init(&src[1], gen, &ins->src[1], mask); ++ msl_src_init(&src[2], gen, &ins->src[2], mask); ++ ++ if ((component_count = vsir_write_mask_component_count(mask)) > 1) ++ msl_print_assignment(gen, &dst, "select(%s, %s, bool%u(%s))", ++ src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); ++ else ++ msl_print_assignment(gen, &dst, "select(%s, %s, bool(%s))", ++ src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); ++ ++ msl_src_cleanup(&src[2], &gen->string_buffers); ++ msl_src_cleanup(&src[1], &gen->string_buffers); ++ msl_src_cleanup(&src[0], &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ + static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + msl_print_indent(gen->buffer, gen->indent); +@@ -315,17 +598,119 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d + + switch (ins->opcode) + { +- case VKD3DSIH_DCL_INPUT: +- case VKD3DSIH_DCL_OUTPUT: +- case VKD3DSIH_DCL_OUTPUT_SIV: ++ case VKD3DSIH_ADD: ++ msl_binop(gen, ins, "+"); ++ break; ++ case VKD3DSIH_AND: ++ msl_binop(gen, ins, "&"); ++ break; + case VKD3DSIH_NOP: + break; ++ case VKD3DSIH_DIV: ++ msl_binop(gen, ins, "/"); ++ break; ++ case VKD3DSIH_DP2: ++ msl_dot(gen, ins, vkd3d_write_mask_from_component_count(2)); ++ break; ++ case VKD3DSIH_DP3: ++ msl_dot(gen, ins, vkd3d_write_mask_from_component_count(3)); ++ break; ++ case VKD3DSIH_DP4: ++ msl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); ++ break; ++ case VKD3DSIH_ELSE: ++ msl_else(gen); ++ break; ++ case VKD3DSIH_ENDIF: ++ msl_end_block(gen); ++ break; ++ case VKD3DSIH_IEQ: ++ msl_relop(gen, ins, "=="); ++ break; ++ case VKD3DSIH_EXP: ++ msl_intrinsic(gen, ins, "exp2"); ++ break; ++ case VKD3DSIH_FRC: ++ msl_intrinsic(gen, ins, "fract"); ++ break; ++ case VKD3DSIH_FTOI: ++ msl_cast(gen, ins, "int"); ++ break; ++ case VKD3DSIH_FTOU: ++ msl_cast(gen, ins, "uint"); ++ break; ++ case VKD3DSIH_GEO: ++ msl_relop(gen, ins, ">="); ++ break; ++ case VKD3DSIH_IF: ++ msl_if(gen, ins); ++ break; ++ case VKD3DSIH_ISHL: ++ msl_binop(gen, ins, "<<"); ++ break; ++ case VKD3DSIH_ISHR: ++ case VKD3DSIH_USHR: ++ msl_binop(gen, ins, ">>"); ++ break; ++ case VKD3DSIH_LTO: ++ msl_relop(gen, ins, "<"); ++ break; ++ case VKD3DSIH_MAD: ++ msl_intrinsic(gen, ins, "fma"); ++ break; ++ case VKD3DSIH_MAX: ++ msl_intrinsic(gen, ins, "max"); ++ break; ++ case VKD3DSIH_MIN: ++ msl_intrinsic(gen, ins, "min"); ++ break; ++ case VKD3DSIH_INE: ++ case VKD3DSIH_NEU: ++ msl_relop(gen, ins, "!="); ++ break; ++ case VKD3DSIH_ITOF: ++ case VKD3DSIH_UTOF: ++ msl_cast(gen, ins, "float"); ++ break; ++ case VKD3DSIH_LOG: ++ msl_intrinsic(gen, ins, "log2"); ++ break; + case VKD3DSIH_MOV: + msl_mov(gen, ins); + break; ++ case VKD3DSIH_MOVC: ++ msl_movc(gen, ins); ++ break; ++ case VKD3DSIH_MUL: ++ msl_binop(gen, ins, "*"); ++ break; ++ case VKD3DSIH_NOT: ++ msl_unary_op(gen, ins, "~"); ++ break; ++ case VKD3DSIH_OR: ++ msl_binop(gen, ins, "|"); ++ break; + case VKD3DSIH_RET: + msl_ret(gen, ins); + break; ++ case VKD3DSIH_ROUND_NE: ++ msl_intrinsic(gen, ins, "rint"); ++ break; ++ case VKD3DSIH_ROUND_NI: ++ msl_intrinsic(gen, ins, "floor"); ++ break; ++ case VKD3DSIH_ROUND_PI: ++ msl_intrinsic(gen, ins, "ceil"); ++ break; ++ case VKD3DSIH_ROUND_Z: ++ msl_intrinsic(gen, ins, "trunc"); ++ break; ++ case VKD3DSIH_RSQ: ++ msl_intrinsic(gen, ins, "rsqrt"); ++ break; ++ case VKD3DSIH_SQRT: ++ msl_intrinsic(gen, ins, "sqrt"); ++ break; + default: + msl_unhandled(gen, ins); + break; +@@ -489,6 +874,16 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) + + if (e->sysval_semantic) + { ++ if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) ++ { ++ if (type != VKD3D_SHADER_TYPE_PIXEL) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", type); ++ ++ msl_print_indent(gen->buffer, 1); ++ vkd3d_string_buffer_printf(buffer, "bool is_front_face [[front_facing]];\n"); ++ continue; ++ } + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); + continue; +@@ -501,13 +896,6 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) + continue; + } + +- if (e->interpolation_mode != VKD3DSIM_NONE) +- { +- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +- "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); +- continue; +- } +- + if(e->register_count > 1) + { + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +@@ -551,6 +939,18 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) + break; + } + ++ switch (e->interpolation_mode) ++ { ++ /* The default interpolation attribute. */ ++ case VKD3DSIM_LINEAR: ++ case VKD3DSIM_NONE: ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); ++ break; ++ } ++ + vkd3d_string_buffer_printf(buffer, ";\n"); + } + +@@ -602,6 +1002,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) + { + e = &signature->elements[i]; + ++ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) ++ { ++ gen->write_depth = true; ++ msl_print_indent(gen->buffer, 1); ++ vkd3d_string_buffer_printf(buffer, "float shader_out_depth [[depth(any)]];\n"); ++ continue; ++ } ++ + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + +@@ -690,6 +1098,10 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen) + vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); + msl_print_write_mask(buffer, e->mask); + } ++ else if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) ++ { ++ vkd3d_string_buffer_printf(buffer, ".u = uint4(input.is_front_face ? 0xffffffffu : 0u, 0, 0, 0)"); ++ } + else + { + vkd3d_string_buffer_printf(buffer, " = ", e->sysval_semantic); +@@ -711,6 +1123,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) + { + e = &signature->elements[i]; + ++ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) ++ { ++ vkd3d_string_buffer_printf(buffer, " output.shader_out_depth = shader_out_depth;\n"); ++ continue; ++ } ++ + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + +@@ -770,9 +1188,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); + ++ if (gen->write_depth) ++ vkd3d_string_buffer_printf(gen->buffer, " float shader_out_depth;\n"); ++ + msl_generate_entrypoint_prologue(gen); + + vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); ++ if (gen->write_depth) ++ vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth"); + if (gen->descriptor_info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); + vkd3d_string_buffer_printf(gen->buffer, ");\n"); +@@ -790,6 +1213,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader + MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); ++ vkd3d_string_buffer_printf(gen->buffer, "#include \n\n"); ++ vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n"); + + if (gen->program->global_flags) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +@@ -808,6 +1233,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader + "void %s_main(thread vkd3d_vec4 *v, " + "thread vkd3d_vec4 *o", + gen->prefix); ++ if (gen->write_depth) ++ vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth"); + if (gen->descriptor_info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); + vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); +@@ -887,7 +1314,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + +- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); + + if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) + return ret; +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l +index 4a8d0fddae1..d167415c356 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l +@@ -20,6 +20,7 @@ + + %{ + ++#include "preproc.h" + #include "preproc.tab.h" + + #undef ERROR /* defined in wingdi.h */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index bdfd632ad12..a7b935543a0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -34,6 +34,32 @@ + # include "vulkan/GLSL.std.450.h" + #endif /* HAVE_SPIRV_UNIFIED1_GLSL_STD_450_H */ + ++#define VKD3D_SPIRV_VERSION_1_0 0x00010000 ++#define VKD3D_SPIRV_VERSION_1_3 0x00010300 ++#define VKD3D_SPIRV_GENERATOR_ID 18 ++#define VKD3D_SPIRV_GENERATOR_VERSION 14 ++#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) ++#ifndef VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER ++# define VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER 0 ++#endif ++ ++#define VKD3D_SPIRV_HEADER_SIZE 5 ++ ++#define VKD3D_SPIRV_VERSION_MAJOR_SHIFT 16u ++#define VKD3D_SPIRV_VERSION_MAJOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MAJOR_SHIFT) ++#define VKD3D_SPIRV_VERSION_MINOR_SHIFT 8u ++#define VKD3D_SPIRV_VERSION_MINOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MINOR_SHIFT) ++ ++#define VKD3D_SPIRV_GENERATOR_ID_SHIFT 16u ++#define VKD3D_SPIRV_GENERATOR_ID_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_ID_SHIFT) ++#define VKD3D_SPIRV_GENERATOR_VERSION_SHIFT 0u ++#define VKD3D_SPIRV_GENERATOR_VERSION_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_VERSION_SHIFT) ++ ++#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT 16u ++#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT) ++#define VKD3D_SPIRV_INSTRUCTION_OP_SHIFT 0u ++#define VKD3D_SPIRV_INSTRUCTION_OP_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_OP_SHIFT) ++ + #ifdef HAVE_SPIRV_TOOLS + # include "spirv-tools/libspirv.h" + +@@ -82,7 +108,7 @@ static uint32_t get_binary_to_text_options(enum vkd3d_shader_compile_option_form + return out; + } + +-static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, ++static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment, + enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) + { +@@ -143,20 +169,6 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co + return result; + } + +-static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, +- enum vkd3d_shader_spirv_environment environment) +-{ +- static const enum vkd3d_shader_compile_option_formatting_flags formatting +- = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; +- struct vkd3d_shader_code text; +- +- if (!vkd3d_spirv_binary_to_text(spirv, environment, formatting, &text)) +- { +- vkd3d_shader_trace_text(text.code, text.size); +- vkd3d_shader_free_shader_code(&text); +- } +-} +- + static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment) + { +@@ -180,14 +192,13 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc + + #else + +-static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, ++static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment, + enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) + { + return VKD3D_ERROR; + } +-static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, +- enum vkd3d_shader_spirv_environment environment) {} ++ + static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, + enum vkd3d_shader_spirv_environment environment) + { +@@ -196,6 +207,312 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc + + #endif /* HAVE_SPIRV_TOOLS */ + ++struct spirv_colours ++{ ++ const char *reset; ++ const char *comment; ++}; ++ ++struct spirv_parser ++{ ++ struct vkd3d_string_buffer_cache string_buffers; ++ struct vkd3d_shader_location location; ++ struct vkd3d_shader_message_context *message_context; ++ enum vkd3d_shader_compile_option_formatting_flags formatting; ++ struct spirv_colours colours; ++ bool failed; ++ ++ const uint32_t *code; ++ size_t pos; ++ size_t size; ++ ++ struct vkd3d_string_buffer *text; ++}; ++ ++static void VKD3D_PRINTF_FUNC(3, 4) spirv_parser_error(struct spirv_parser *parser, ++ enum vkd3d_shader_error error, const char *format, ...) ++{ ++ va_list args; ++ ++ va_start(args, format); ++ vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); ++ va_end(args); ++ parser->failed = true; ++} ++ ++static uint32_t spirv_parser_read_u32(struct spirv_parser *parser) ++{ ++ if (parser->pos >= parser->size) ++ { ++ parser->failed = true; ++ return 0; ++ } ++ ++ return parser->code[parser->pos++]; ++} ++ ++static void VKD3D_PRINTF_FUNC(2, 3) spirv_parser_print_comment(struct spirv_parser *parser, const char *format, ...) ++{ ++ va_list args; ++ ++ if (!parser->text) ++ return; ++ ++ va_start(args, format); ++ vkd3d_string_buffer_printf(parser->text, "%s; ", parser->colours.comment); ++ vkd3d_string_buffer_vprintf(parser->text, format, args); ++ vkd3d_string_buffer_printf(parser->text, "%s\n", parser->colours.reset); ++ va_end(args); ++} ++ ++static void spirv_parser_print_generator(struct spirv_parser *parser, uint32_t magic) ++{ ++ unsigned int id, version; ++ const char *name; ++ ++ id = (magic & VKD3D_SPIRV_GENERATOR_ID_MASK) >> VKD3D_SPIRV_GENERATOR_ID_SHIFT; ++ version = (magic & VKD3D_SPIRV_GENERATOR_VERSION_MASK) >> VKD3D_SPIRV_GENERATOR_VERSION_SHIFT; ++ ++ switch (id) ++ { ++ case VKD3D_SPIRV_GENERATOR_ID: ++ name = "Wine VKD3D Shader Compiler"; ++ break; ++ ++ default: ++ name = NULL; ++ break; ++ } ++ ++ if (name) ++ spirv_parser_print_comment(parser, "Generator: %s; %u", name, version); ++ else ++ spirv_parser_print_comment(parser, "Generator: Unknown (%#x); %u", id, version); ++} ++ ++static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser) ++{ ++ uint32_t magic, version, generator, bound, schema; ++ unsigned int major, minor; ++ ++ if (parser->pos > parser->size || parser->size - parser->pos < VKD3D_SPIRV_HEADER_SIZE) ++ { ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, ++ "Unexpected end while reading the SPIR-V header."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ magic = spirv_parser_read_u32(parser); ++ version = spirv_parser_read_u32(parser); ++ generator = spirv_parser_read_u32(parser); ++ bound = spirv_parser_read_u32(parser); ++ schema = spirv_parser_read_u32(parser); ++ ++ if (magic != SpvMagicNumber) ++ { ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, ++ "Invalid magic number %#08x.", magic); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (version & ~(VKD3D_SPIRV_VERSION_MAJOR_MASK | VKD3D_SPIRV_VERSION_MINOR_MASK)) ++ { ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, ++ "Invalid version token %#08x.", version); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ major = (version & VKD3D_SPIRV_VERSION_MAJOR_MASK) >> VKD3D_SPIRV_VERSION_MAJOR_SHIFT; ++ minor = (version & VKD3D_SPIRV_VERSION_MINOR_MASK) >> VKD3D_SPIRV_VERSION_MINOR_SHIFT; ++ if (major != 1 || minor > 0) ++ { ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "Unable to parse SPIR-V version %u.%u.", major, minor); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ if (!bound) ++ { ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, ++ "Invalid zero id bound."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ if (schema) ++ { ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "Unable to handle instruction schema %#08x.", schema); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ if (parser->formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER) ++ { ++ spirv_parser_print_comment(parser, "SPIR-V"); ++ spirv_parser_print_comment(parser, "Version: %u.%u", major, minor); ++ spirv_parser_print_generator(parser, generator); ++ spirv_parser_print_comment(parser, "Bound: %u", bound); ++ spirv_parser_print_comment(parser, "Schema: %u", schema); ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result spirv_parser_parse_instruction(struct spirv_parser *parser) ++{ ++ struct vkd3d_string_buffer *buffer; ++ uint16_t op, count; ++ unsigned int i; ++ uint32_t word; ++ ++ word = spirv_parser_read_u32(parser); ++ count = (word & VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK) >> VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT; ++ op = (word & VKD3D_SPIRV_INSTRUCTION_OP_MASK) >> VKD3D_SPIRV_INSTRUCTION_OP_SHIFT; ++ ++ if (!count) ++ { ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, ++ "Invalid word count %u.", count); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ --count; ++ buffer = vkd3d_string_buffer_get(&parser->string_buffers); ++ for (i = 0; i < count; ++i) ++ { ++ word = spirv_parser_read_u32(parser); ++ vkd3d_string_buffer_printf(buffer, " 0x%08x", word); ++ } ++ spirv_parser_print_comment(parser, "%s", op, buffer->buffer); ++ vkd3d_string_buffer_release(&parser->string_buffers, buffer); ++ ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "Unrecognised instruction %#x.", op); ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result spirv_parser_parse(struct spirv_parser *parser, struct vkd3d_shader_code *text) ++{ ++ enum vkd3d_result ret; ++ ++ if (text) ++ parser->text = vkd3d_string_buffer_get(&parser->string_buffers); ++ ++ if ((ret = spirv_parser_read_header(parser)) < 0) ++ goto fail; ++ while (parser->pos < parser->size) ++ { ++ ++parser->location.line; ++ if ((ret = spirv_parser_parse_instruction(parser)) < 0) ++ goto fail; ++ } ++ ++ if (parser->failed) ++ { ++ ret = VKD3D_ERROR_INVALID_SHADER; ++ goto fail; ++ } ++ ++ if (text) ++ vkd3d_shader_code_from_string_buffer(text, parser->text); ++ ++ return VKD3D_OK; ++ ++fail: ++ if (parser->text) ++ { ++ if (TRACE_ON()) ++ vkd3d_string_buffer_trace(parser->text); ++ vkd3d_string_buffer_release(&parser->string_buffers, parser->text); ++ } ++ return ret; ++} ++ ++static void spirv_parser_cleanup(struct spirv_parser *parser) ++{ ++ vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); ++} ++ ++static enum vkd3d_result spirv_parser_init(struct spirv_parser *parser, const struct vkd3d_shader_code *source, ++ const char *source_name, enum vkd3d_shader_compile_option_formatting_flags formatting, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ static const struct spirv_colours no_colours = ++ { ++ .reset = "", ++ .comment = "", ++ }; ++ static const struct spirv_colours colours = ++ { ++ .reset = "\x1b[m", ++ .comment = "\x1b[36m", ++ }; ++ ++ memset(parser, 0, sizeof(*parser)); ++ parser->location.source_name = source_name; ++ parser->message_context = message_context; ++ vkd3d_string_buffer_cache_init(&parser->string_buffers); ++ ++ if (source->size % 4) ++ { ++ vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); ++ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, ++ "Shader size %zu is not a multiple of four.", source->size); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ ++ parser->formatting = formatting; ++ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR) ++ parser->colours = colours; ++ else ++ parser->colours = no_colours; ++ parser->code = source->code; ++ parser->size = source->size / 4; ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, ++ const char *source_name, enum vkd3d_shader_spirv_environment environment, ++ enum vkd3d_shader_compile_option_formatting_flags formatting, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) ++{ ++ struct spirv_parser parser; ++ enum vkd3d_result ret; ++ ++ if (!VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) ++ return spirv_tools_binary_to_text(spirv, environment, formatting, out); ++ ++ MESSAGE("Creating a SPIR-V parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); ++ ++ if ((ret = spirv_parser_init(&parser, spirv, source_name, formatting, message_context)) < 0) ++ return ret; ++ ++ ret = spirv_parser_parse(&parser, out); ++ ++ spirv_parser_cleanup(&parser); ++ ++ return ret; ++} ++ ++static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) ++{ ++ static const enum vkd3d_shader_compile_option_formatting_flags formatting ++ = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; ++ struct vkd3d_shader_message_context message_context; ++ struct vkd3d_shader_code text; ++ ++ vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); ++ ++ if (!vkd3d_spirv_binary_to_text(spirv, NULL, environment, formatting, &text, &message_context)) ++ { ++ vkd3d_shader_trace_text(text.code, text.size); ++ vkd3d_shader_free_shader_code(&text); ++ } ++ ++ vkd3d_shader_message_context_cleanup(&message_context); ++} ++ + enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, + unsigned int index) + { +@@ -247,12 +564,6 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d + } + } + +-#define VKD3D_SPIRV_VERSION_1_0 0x00010000 +-#define VKD3D_SPIRV_VERSION_1_3 0x00010300 +-#define VKD3D_SPIRV_GENERATOR_ID 18 +-#define VKD3D_SPIRV_GENERATOR_VERSION 14 +-#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) +- + struct vkd3d_spirv_stream + { + uint32_t *words; +@@ -2406,6 +2717,7 @@ struct vkd3d_hull_shader_variables + struct ssa_register_info + { + enum vkd3d_data_type data_type; ++ uint8_t write_mask; + uint32_t id; + }; + +@@ -2471,7 +2783,6 @@ struct spirv_compiler + bool emit_point_size; + + enum vkd3d_shader_opcode phase; +- bool emit_default_control_point_phase; + struct vkd3d_shader_phase control_point_phase; + struct vkd3d_shader_phase patch_constant_phase; + +@@ -3316,13 +3627,19 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, + static const struct vkd3d_spec_constant_info + { + enum vkd3d_shader_parameter_name name; +- uint32_t default_value; ++ union ++ { ++ uint32_t u; ++ float f; ++ } default_value; + const char *debug_name; + } + vkd3d_shader_parameters[] = + { +- {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, +- {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, ++ {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, {.u = 1}, "sample_count"}, ++ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, {.f = 0.0f}, "alpha_test_ref"}, ++ {VKD3D_SHADER_PARAMETER_NAME_FOG_END, {.f = 1.0f}, "fog_end"}, ++ {VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, {.f = 1.0f}, "fog_scale"}, + }; + + static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) +@@ -3383,7 +3700,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile + const struct vkd3d_spec_constant_info *info; + + info = get_spec_constant_info(name); +- default_value = info ? info->default_value : 0; ++ default_value = info ? info->default_value.u : 0; + + scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); + vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); +@@ -3574,6 +3891,24 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, + register_info->is_aggregate = false; + return true; + } ++ else if (reg->type == VKD3DSPR_SSA) ++ { ++ const struct ssa_register_info *ssa = &compiler->ssa_register_info[reg->idx[0].offset]; ++ ++ if (!ssa->id) ++ { ++ /* Should only be from a missing instruction implementation. */ ++ VKD3D_ASSERT(compiler->failed); ++ return 0; ++ } ++ ++ memset(register_info, 0, sizeof(*register_info)); ++ register_info->id = ssa->id; ++ register_info->storage_class = SpvStorageClassMax; ++ register_info->component_type = vkd3d_component_type_from_data_type(ssa->data_type); ++ register_info->write_mask = ssa->write_mask; ++ return true; ++ } + + vkd3d_symbol_make_register(®_symbol, reg); + if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) +@@ -4181,67 +4516,14 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil + return const_id; + } + +-static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg) +-{ +- VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); +- VKD3D_ASSERT(reg->idx_count == 1); +- return &compiler->ssa_register_info[reg->idx[0].offset]; +-} +- + static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg, uint32_t val_id) ++ const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) + { + unsigned int i = reg->idx[0].offset; + VKD3D_ASSERT(i < compiler->ssa_register_count); + compiler->ssa_register_info[i].data_type = reg->data_type; + compiler->ssa_register_info[i].id = val_id; +-} +- +-static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg, enum vkd3d_shader_component_type component_type, +- uint32_t swizzle) +-{ +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- enum vkd3d_shader_component_type reg_component_type; +- const struct ssa_register_info *ssa; +- unsigned int component_idx; +- uint32_t type_id, val_id; +- +- ssa = spirv_compiler_get_ssa_register_info(compiler, reg); +- val_id = ssa->id; +- if (!val_id) +- { +- /* Should only be from a missing instruction implementation. */ +- VKD3D_ASSERT(compiler->failed); +- return 0; +- } +- VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); +- +- reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); +- +- if (reg->dimension == VSIR_DIMENSION_SCALAR) +- { +- if (component_type != reg_component_type) +- { +- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); +- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); +- } +- +- return val_id; +- } +- +- if (component_type != reg_component_type) +- { +- /* Required for resource loads with sampled type int, because DXIL has no signedness. +- * Only 128-bit vector sizes are used. */ +- type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE); +- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); +- } +- +- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); +- component_idx = vsir_swizzle_get_component(swizzle, 0); +- return vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx); ++ compiler->ssa_register_info[i].write_mask = write_mask; + } + + static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, +@@ -4267,9 +4549,6 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + component_count = vsir_write_mask_component_count(write_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); + +- if (reg->type == VKD3DSPR_SSA) +- return spirv_compiler_emit_load_ssa_reg(compiler, reg, component_type, swizzle); +- + if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); +@@ -4294,9 +4573,9 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + type_id = vkd3d_spirv_get_type_id(builder, + reg_info.component_type, vsir_write_mask_component_count(reg_info.write_mask)); + val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone); ++ swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; + } + +- swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; + val_id = spirv_compiler_emit_swizzle(compiler, + val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); + +@@ -4497,7 +4776,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, + + if (reg->type == VKD3DSPR_SSA) + { +- spirv_compiler_set_ssa_register_info(compiler, reg, val_id); ++ spirv_compiler_set_ssa_register_info(compiler, reg, write_mask, val_id); + return; + } + +@@ -4883,35 +5162,36 @@ static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = + static const struct + { + enum vkd3d_shader_register_type reg_type; ++ SpvStorageClass storage_class; + struct vkd3d_spirv_builtin builtin; + } + vkd3d_register_builtins[] = + { +- {VKD3DSPR_THREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, +- {VKD3DSPR_LOCALTHREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, +- {VKD3DSPR_LOCALTHREADINDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, +- {VKD3DSPR_THREADGROUPID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, ++ {VKD3DSPR_THREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, ++ {VKD3DSPR_LOCALTHREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, ++ {VKD3DSPR_LOCALTHREADINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, ++ {VKD3DSPR_THREADGROUPID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, + +- {VKD3DSPR_GSINSTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, +- {VKD3DSPR_OUTPOINTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, ++ {VKD3DSPR_GSINSTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, ++ {VKD3DSPR_OUTPOINTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, + +- {VKD3DSPR_PRIMID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, ++ {VKD3DSPR_PRIMID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, + +- {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, ++ {VKD3DSPR_TESSCOORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, + +- {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, ++ {VKD3DSPR_POINT_COORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, + +- {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, +- {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, ++ {VKD3DSPR_COVERAGE, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, ++ {VKD3DSPR_SAMPLEMASK, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + +- {VKD3DSPR_DEPTHOUT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, +- {VKD3DSPR_DEPTHOUTGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, +- {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, ++ {VKD3DSPR_DEPTHOUT, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, ++ {VKD3DSPR_DEPTHOUTGE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, ++ {VKD3DSPR_DEPTHOUTLE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, + +- {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, ++ {VKD3DSPR_OUTSTENCILREF, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, + +- {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, +- {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, ++ {VKD3DSPR_WAVELANECOUNT, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, ++ {VKD3DSPR_WAVELANEINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, + }; + + static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, +@@ -4970,14 +5250,18 @@ static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_sysval( + } + + static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_register( +- enum vkd3d_shader_register_type reg_type) ++ enum vkd3d_shader_register_type reg_type, SpvStorageClass *storage_class) + { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(vkd3d_register_builtins); ++i) + { + if (vkd3d_register_builtins[i].reg_type == reg_type) ++ { ++ if (storage_class) ++ *storage_class = vkd3d_register_builtins[i].storage_class; + return &vkd3d_register_builtins[i].builtin; ++ } + } + + return NULL; +@@ -4990,7 +5274,7 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp + + if ((builtin = get_spirv_builtin_for_sysval(compiler, sysval))) + return builtin; +- if ((builtin = get_spirv_builtin_for_register(reg_type))) ++ if ((builtin = get_spirv_builtin_for_register(reg_type, NULL))) + return builtin; + + if ((sysval != VKD3D_SHADER_SV_NONE && sysval != VKD3D_SHADER_SV_TARGET) +@@ -5322,21 +5606,26 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + return input_id; + } + +-static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, ++static void spirv_compiler_emit_io_register(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_register *reg = &dst->reg; + const struct vkd3d_spirv_builtin *builtin; + struct vkd3d_symbol reg_symbol; ++ SpvStorageClass storage_class; ++ uint32_t write_mask, id; + struct rb_entry *entry; +- uint32_t write_mask; +- uint32_t input_id; + + VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); + VKD3D_ASSERT(reg->idx_count < 2); + +- if (!(builtin = get_spirv_builtin_for_register(reg->type))) ++ if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) ++ { ++ builtin = &vkd3d_output_point_size_builtin; ++ storage_class = SpvStorageClassOutput; ++ } ++ else if (!(builtin = get_spirv_builtin_for_register(reg->type, &storage_class))) + { + FIXME("Unhandled register %#x.\n", reg->type); + return; +@@ -5347,14 +5636,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, + if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) + return; + +- input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassInput, 0); ++ id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, 0); + + write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); +- vkd3d_symbol_set_register_info(®_symbol, input_id, +- SpvStorageClassInput, builtin->component_type, write_mask); ++ vkd3d_symbol_set_register_info(®_symbol, id, ++ storage_class, builtin->component_type, write_mask); + reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; + spirv_compiler_put_symbol(compiler, ®_symbol); +- spirv_compiler_emit_register_debug_name(builder, input_id, reg); ++ spirv_compiler_emit_register_execution_mode(compiler, reg->type); ++ spirv_compiler_emit_register_debug_name(builder, id, reg); + } + + static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, +@@ -5458,41 +5748,6 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * + } + } + +-static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +- const struct vkd3d_shader_dst_param *dst) +-{ +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- const struct vkd3d_shader_register *reg = &dst->reg; +- const struct vkd3d_spirv_builtin *builtin; +- struct vkd3d_symbol reg_symbol; +- uint32_t write_mask; +- uint32_t output_id; +- +- VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); +- VKD3D_ASSERT(reg->idx_count < 2); +- +- if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) +- { +- builtin = &vkd3d_output_point_size_builtin; +- } +- else if (!(builtin = get_spirv_builtin_for_register(reg->type))) +- { +- FIXME("Unhandled register %#x.\n", reg->type); +- return; +- } +- +- output_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); +- +- vkd3d_symbol_make_register(®_symbol, reg); +- write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); +- vkd3d_symbol_set_register_info(®_symbol, output_id, +- SpvStorageClassOutput, builtin->component_type, write_mask); +- reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; +- spirv_compiler_put_symbol(compiler, ®_symbol); +- spirv_compiler_emit_register_execution_mode(compiler, reg->type); +- spirv_compiler_emit_register_debug_name(builder, output_id, reg); +-} +- + static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, + const struct vkd3d_spirv_builtin *builtin, const unsigned int *array_sizes, unsigned int size_count) + { +@@ -5857,16 +6112,6 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * + compiler->epilogue_function_id = 0; + } + +-static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *compiler) +-{ +- struct vkd3d_shader_dst_param dst; +- +- memset(&dst, 0, sizeof(dst)); +- vsir_register_init(&dst.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); +- dst.write_mask = VKD3DSP_WRITEMASK_0; +- spirv_compiler_emit_input_register(compiler, &dst); +-} +- + static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) + { + const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; +@@ -5879,7 +6124,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp + break; + case VKD3D_SHADER_TYPE_HULL: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); +- spirv_compiler_emit_hull_shader_builtins(compiler); + break; + case VKD3D_SHADER_TYPE_DOMAIN: + vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); +@@ -6699,27 +6943,6 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi + tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); + } + +-static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; +- +- /* INPUT and PATCHCONST are handled in spirv_compiler_emit_io_declarations(). +- * OUTPOINTID is handled in spirv_compiler_emit_hull_shader_builtins(). */ +- if (dst->reg.type != VKD3DSPR_INPUT && dst->reg.type != VKD3DSPR_PATCHCONST +- && dst->reg.type != VKD3DSPR_OUTPOINTID) +- spirv_compiler_emit_input_register(compiler, dst); +-} +- +-static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) +-{ +- const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; +- +- if (dst->reg.type != VKD3DSPR_OUTPUT && dst->reg.type != VKD3DSPR_PATCHCONST) +- spirv_compiler_emit_output_register(compiler, dst); +-} +- + static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) + { +@@ -6822,15 +7045,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler + compiler->spirv_builder.invocation_count = instruction->declaration.count; + } + +-static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, ++ enum vkd3d_tessellator_domain domain) + { +- enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; + SpvExecutionMode mode; + +- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) +- return; +- + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: +@@ -6916,15 +7135,10 @@ static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compile + SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); + } + +-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); +- + static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + +- if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) +- spirv_compiler_emit_default_control_point_phase(compiler); +- + vkd3d_spirv_build_op_function_end(builder); + + if (is_in_control_point_phase(compiler)) +@@ -6969,9 +7183,6 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, + phase->function_id = function_id; + /* The insertion location must be set after the label is emitted. */ + phase->function_location = 0; +- +- if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) +- compiler->emit_default_control_point_phase = instruction->flags; + } + + static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) +@@ -7000,63 +7211,6 @@ static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) + } + } + +-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) +-{ +- const struct shader_signature *output_signature = &compiler->output_signature; +- const struct shader_signature *input_signature = &compiler->input_signature; +- uint32_t type_id, output_ptr_type_id, input_id, dst_id, invocation_id; +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- enum vkd3d_shader_component_type component_type; +- struct vkd3d_shader_src_param invocation; +- struct vkd3d_shader_register input_reg; +- unsigned int component_count; +- unsigned int i; +- +- vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); +- spirv_compiler_initialise_block(compiler); +- invocation_id = spirv_compiler_emit_load_invocation_id(compiler); +- +- memset(&invocation, 0, sizeof(invocation)); +- vsir_register_init(&invocation.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_INT, 0); +- invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; +- +- vsir_register_init(&input_reg, VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 2); +- input_reg.idx[0].offset = 0; +- input_reg.idx[0].rel_addr = &invocation; +- input_reg.idx[1].offset = 0; +- input_id = spirv_compiler_get_register_id(compiler, &input_reg); +- +- VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); +- for (i = 0; i < output_signature->element_count; ++i) +- { +- const struct signature_element *output = &output_signature->elements[i]; +- const struct signature_element *input = &input_signature->elements[i]; +- struct vkd3d_shader_register_info output_reg_info; +- struct vkd3d_shader_register output_reg; +- +- VKD3D_ASSERT(input->mask == output->mask); +- VKD3D_ASSERT(input->component_type == output->component_type); +- +- input_reg.idx[1].offset = i; +- input_id = spirv_compiler_get_register_id(compiler, &input_reg); +- +- vsir_register_init(&output_reg, VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); +- output_reg.idx[0].offset = i; +- spirv_compiler_get_register_info(compiler, &output_reg, &output_reg_info); +- +- component_type = output->component_type; +- component_count = vsir_write_mask_component_count(output->mask); +- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); +- output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); +- +- dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_reg_info.id, invocation_id); +- +- vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); +- } +- +- vkd3d_spirv_build_op_return(builder); +-} +- + static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, + SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) + { +@@ -7501,7 +7655,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + + general_implementation: + write_mask = dst->write_mask; +- if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) ++ if (data_type_is_64_bit(src->reg.data_type) && !data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_64_from_32(write_mask); + else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_32_from_64(write_mask); +@@ -10214,13 +10368,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_TGSM_STRUCTURED: + spirv_compiler_emit_dcl_tgsm_structured(compiler, instruction); + break; +- case VKD3DSIH_DCL_INPUT_PS: +- case VKD3DSIH_DCL_INPUT: +- spirv_compiler_emit_dcl_input(compiler, instruction); +- break; +- case VKD3DSIH_DCL_OUTPUT: +- spirv_compiler_emit_dcl_output(compiler, instruction); +- break; + case VKD3DSIH_DCL_STREAM: + spirv_compiler_emit_dcl_stream(compiler, instruction); + break; +@@ -10239,9 +10386,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + spirv_compiler_emit_output_vertex_count(compiler, instruction); + break; +- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: +- spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); +- break; + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + spirv_compiler_emit_tessellator_output_primitive(compiler, + instruction->declaration.tessellator_output_primitive); +@@ -10561,11 +10705,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + break; + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: +- case VKD3DSIH_DCL_INPUT_SGV: +- case VKD3DSIH_DCL_INPUT_SIV: +- case VKD3DSIH_DCL_INPUT_PS_SGV: +- case VKD3DSIH_DCL_INPUT_PS_SIV: +- case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: + case VKD3DSIH_DCL_UAV_RAW: +@@ -10586,6 +10725,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + + static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) + { ++ struct vkd3d_shader_dst_param dst; ++ + for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i) + spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i); + +@@ -10609,19 +10750,27 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) + + if (compiler->program->has_point_size) + { +- struct vkd3d_shader_dst_param dst; +- + vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; +- spirv_compiler_emit_output_register(compiler, &dst); ++ spirv_compiler_emit_io_register(compiler, &dst); + } + + if (compiler->program->has_point_coord) + { +- struct vkd3d_shader_dst_param dst; +- + vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); +- spirv_compiler_emit_input_register(compiler, &dst); ++ spirv_compiler_emit_io_register(compiler, &dst); ++ } ++ ++ for (unsigned int i = 0; i < sizeof(compiler->program->io_dcls) * CHAR_BIT; ++i) ++ { ++ /* For hull shaders we internally generate references to OUTPOINTID, ++ * so that must always be enabled. */ ++ if (bitmap_is_set(compiler->program->io_dcls, i) ++ || (compiler->program->shader_version.type == VKD3D_SHADER_TYPE_HULL && i == VKD3DSPR_OUTPOINTID)) ++ { ++ vsir_dst_param_init(&dst, i, VKD3D_DATA_FLOAT, 0); ++ spirv_compiler_emit_io_register(compiler, &dst); ++ } + } + } + +@@ -10677,7 +10826,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + compile_info, compiler->message_context)) < 0) + return result; + +- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); + + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); + if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) +@@ -10743,6 +10892,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + compiler->input_control_point_count = program->input_control_point_count; + compiler->output_control_point_count = program->output_control_point_count; + ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) ++ spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); ++ + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); + +@@ -10823,7 +10975,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) + { + struct vkd3d_shader_code text; +- if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) ++ if (vkd3d_spirv_binary_to_text(spirv, compile_info->source_name, environment, ++ compiler->formatting, &text, compiler->message_context) != VKD3D_OK) + return VKD3D_ERROR; + vkd3d_shader_free_shader_code(spirv); + *spirv = text; +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 9c41e2c2053..872603052ac 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -166,21 +166,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); + /* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ + #define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 + +-#define VKD3D_SM4_REQUIRES_DOUBLES 0x00000001 +-#define VKD3D_SM4_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002 +-#define VKD3D_SM4_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004 +-#define VKD3D_SM4_REQUIRES_64_UAVS 0x00000008 +-#define VKD3D_SM4_REQUIRES_MINIMUM_PRECISION 0x00000010 +-#define VKD3D_SM4_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020 +-#define VKD3D_SM4_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040 +-#define VKD3D_SM4_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080 +-#define VKD3D_SM4_REQUIRES_TILED_RESOURCES 0x00000100 +-#define VKD3D_SM4_REQUIRES_STENCIL_REF 0x00000200 +-#define VKD3D_SM4_REQUIRES_INNER_COVERAGE 0x00000400 +-#define VKD3D_SM4_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800 +-#define VKD3D_SM4_REQUIRES_ROVS 0x00001000 +-#define VKD3D_SM4_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000 +- + enum vkd3d_sm4_opcode + { + VKD3D_SM4_OP_ADD = 0x00, +@@ -707,6 +692,7 @@ struct vkd3d_sm4_opcode_info + char src_info[SM4_MAX_SRC_COUNT]; + void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); ++ bool is_conditional_op; + }; + + static const enum vkd3d_primitive_type output_primitive_type_table[] = +@@ -1268,6 +1254,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi + { + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; ++ priv->p.program->tess_domain = ins->declaration.tessellator_domain; + } + + static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1275,6 +1262,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins + { + ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; ++ priv->p.program->tess_partitioning = ins->declaration.tessellator_partitioning; + } + + static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1282,6 +1270,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader + { + ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; ++ priv->p.program->tess_output_primitive = ins->declaration.tessellator_output_primitive; + } + + static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1407,8 +1396,6 @@ struct sm4_stat + + struct tpf_compiler + { +- /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ +- struct hlsl_ctx *ctx; + struct vsir_program *program; + struct vkd3d_sm4_lookup_tables lookup; + struct sm4_stat *stat; +@@ -1439,18 +1426,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", + shader_sm4_read_case_condition}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, + {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, + {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, + {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, + {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, + {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, + {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, +@@ -1468,7 +1455,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"}, + {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, + {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, + {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, + {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, +@@ -1502,7 +1489,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, + {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, + {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, + {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, +@@ -1967,16 +1954,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t + return lookup->register_type_info_from_vkd3d[vkd3d_type]; + } + +-static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( +- const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) +-{ +- const struct vkd3d_sm4_register_type_info *register_type_info = +- get_info_from_vkd3d_register_type(lookup, vkd3d_type); +- +- VKD3D_ASSERT(register_type_info); +- return register_type_info->default_src_swizzle_type; +-} +- + static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) + { +@@ -2816,7 +2793,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro + + /* Estimate instruction count to avoid reallocation in most shaders. */ + if (!vsir_program_init(program, compile_info, +- &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) ++ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) + return false; + vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); + sm4->ptr = sm4->start; +@@ -2925,6 +2902,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + program->input_signature = dxbc_desc.input_signature; + program->output_signature = dxbc_desc.output_signature; + program->patch_constant_signature = dxbc_desc.patch_constant_signature; ++ program->features = dxbc_desc.features; + memset(&dxbc_desc, 0, sizeof(dxbc_desc)); + + /* DXBC stores used masks inverted for output signatures, for some reason. +@@ -2993,8 +2971,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + return VKD3D_OK; + } + +-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); +- + bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) + { +@@ -3217,18 +3193,17 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s + return true; + } + +-static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, +- uint32_t tag, struct vkd3d_bytecode_buffer *buffer) ++static void add_section(struct tpf_compiler *tpf, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) + { + /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN + * sections to be aligned. Without this, the sections themselves will be + * aligned, but their reported sizes won't. */ + size_t size = bytecode_align(buffer); + +- dxbc_writer_add_section(dxbc, tag, buffer->data, size); ++ dxbc_writer_add_section(&tpf->dxbc, tag, buffer->data, size); + + if (buffer->status < 0) +- ctx->result = buffer->status; ++ tpf->result = buffer->status; + } + + static int signature_element_pointer_compare(const void *x, const void *y) +@@ -3289,7 +3264,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si + set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); + } + +- add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); ++ add_section(tpf, tag, &buffer); + vkd3d_free(sorted_elements); + } + +@@ -3331,6 +3306,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -3507,28 +3483,6 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ + } + } + +-struct extern_resource +-{ +- /* var is only not NULL if this resource is a whole variable, so it may be responsible for more +- * than one component. */ +- const struct hlsl_ir_var *var; +- const struct hlsl_buffer *buffer; +- +- char *name; +- bool is_user_packed; +- +- /* The data type of a single component of the resource. +- * This might be different from the data type of the resource itself in 4.0 +- * profiles, where an array (or multi-dimensional array) is handled as a +- * single resource, unlike in 5.0. */ +- struct hlsl_type *component_type; +- +- enum hlsl_regset regset; +- unsigned int id, space, index, bind_count; +- +- struct vkd3d_shader_location loc; +-}; +- + static int sm4_compare_extern_resources(const void *a, const void *b) + { + const struct extern_resource *aa = (const struct extern_resource *)a; +@@ -3544,7 +3498,7 @@ static int sm4_compare_extern_resources(const void *a, const void *b) + return vkd3d_u32_compare(aa->index, bb->index); + } + +-static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) ++void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) + { + unsigned int i; + +@@ -3560,7 +3514,7 @@ static const char *string_skip_tag(const char *string) + return string; + } + +-static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) ++struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) + { + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; + struct extern_resource *extern_resources = NULL; +@@ -3770,7 +3724,7 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl + vkd3d_unreachable(); + } + +-static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) ++void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) + { + uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; +@@ -4000,36 +3954,41 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); + set_u32(&buffer, creator_position, creator_offset); + +- add_section(ctx, dxbc, TAG_RDEF, &buffer); +- + sm4_free_extern_resources(extern_resources, extern_resources_count); ++ ++ if (buffer.status) ++ { ++ vkd3d_free(buffer.data); ++ ctx->result = buffer.status; ++ return; ++ } ++ rdef->code = buffer.data; ++ rdef->size = buffer.size; + } + +-static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) ++static enum vkd3d_sm4_resource_type sm4_resource_dimension(enum vkd3d_shader_resource_type resource_type) + { +- switch (type->sampler_dim) ++ switch (resource_type) + { +- case HLSL_SAMPLER_DIM_1D: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_1D: + return VKD3D_SM4_RESOURCE_TEXTURE_1D; +- case HLSL_SAMPLER_DIM_2D: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_2D: + return VKD3D_SM4_RESOURCE_TEXTURE_2D; +- case HLSL_SAMPLER_DIM_3D: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_3D: + return VKD3D_SM4_RESOURCE_TEXTURE_3D; +- case HLSL_SAMPLER_DIM_CUBE: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; +- case HLSL_SAMPLER_DIM_1DARRAY: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; +- case HLSL_SAMPLER_DIM_2DARRAY: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; +- case HLSL_SAMPLER_DIM_2DMS: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_2DMS: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; +- case HLSL_SAMPLER_DIM_2DMSARRAY: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; +- case HLSL_SAMPLER_DIM_CUBEARRAY: ++ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; +- case HLSL_SAMPLER_DIM_BUFFER: +- case HLSL_SAMPLER_DIM_RAW_BUFFER: +- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ case VKD3D_SHADER_RESOURCE_BUFFER: + return VKD3D_SM4_RESOURCE_BUFFER; + default: + vkd3d_unreachable(); +@@ -4096,297 +4055,6 @@ struct sm4_instruction + unsigned int idx_src_count; + }; + +-static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, +- const struct hlsl_ir_node *instr) +-{ +- VKD3D_ASSERT(instr->reg.allocated); +- reg->type = VKD3DSPR_TEMP; +- reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = instr->reg.id; +- reg->idx_count = 1; +- *writemask = instr->reg.writemask; +-} +- +-static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, +- enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, +- struct sm4_instruction *sm4_instr) +-{ +- const struct hlsl_ir_var *var = deref->var; +- unsigned int offset_const_deref; +- +- reg->type = type; +- reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; +- reg->dimension = VSIR_DIMENSION_VEC4; +- +- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); +- +- if (!var->indexable) +- { +- offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx[0].offset += offset_const_deref / 4; +- reg->idx_count = 1; +- } +- else +- { +- offset_const_deref = deref->const_offset; +- reg->idx[1].offset = offset_const_deref / 4; +- reg->idx_count = 2; +- +- if (deref->rel_offset.node) +- { +- struct vkd3d_shader_src_param *idx_src; +- unsigned int idx_writemask; +- +- VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); +- idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; +- memset(idx_src, 0, sizeof(*idx_src)); +- +- reg->idx[1].rel_addr = idx_src; +- sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); +- VKD3D_ASSERT(idx_writemask != 0); +- idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); +- } +- } +- +- *writemask = 0xf & (0xf << (offset_const_deref % 4)); +- if (var->regs[HLSL_REGSET_NUMERIC].writemask) +- *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); +-} +- +-static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, +- uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); +- const struct hlsl_ir_var *var = deref->var; +- struct hlsl_ctx *ctx = tpf->ctx; +- +- if (var->is_uniform) +- { +- enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); +- +- if (regset == HLSL_REGSET_TEXTURES) +- { +- reg->type = VKD3DSPR_RESOURCE; +- reg->dimension = VSIR_DIMENSION_VEC4; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; +- reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ +- reg->idx_count = 2; +- } +- else +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx_count = 1; +- } +- VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else if (regset == HLSL_REGSET_UAVS) +- { +- reg->type = VKD3DSPR_UAV; +- reg->dimension = VSIR_DIMENSION_VEC4; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; +- reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ +- reg->idx_count = 2; +- } +- else +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx_count = 1; +- } +- VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else if (regset == HLSL_REGSET_SAMPLERS) +- { +- reg->type = VKD3DSPR_SAMPLER; +- reg->dimension = VSIR_DIMENSION_NONE; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; +- reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ +- reg->idx_count = 2; +- } +- else +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx_count = 1; +- } +- VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; +- +- VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); +- reg->type = VKD3DSPR_CONSTBUFFER; +- reg->dimension = VSIR_DIMENSION_VEC4; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->buffer->reg.id; +- reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ +- reg->idx[2].offset = offset / 4; +- reg->idx_count = 3; +- } +- else +- { +- reg->idx[0].offset = var->buffer->reg.index; +- reg->idx[1].offset = offset / 4; +- reg->idx_count = 2; +- } +- *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); +- } +- } +- else if (var->is_input_semantic) +- { +- bool has_idx; +- +- if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); +- +- if (has_idx) +- { +- reg->idx[0].offset = var->semantic.index + offset / 4; +- reg->idx_count = 1; +- } +- +- if (shader_sm4_is_scalar_register(reg)) +- reg->dimension = VSIR_DIMENSION_SCALAR; +- else +- reg->dimension = VSIR_DIMENSION_VEC4; +- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); +- } +- else +- { +- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); +- +- VKD3D_ASSERT(hlsl_reg.allocated); +- +- if (version->type == VKD3D_SHADER_TYPE_DOMAIN) +- reg->type = VKD3DSPR_PATCHCONST; +- else +- reg->type = VKD3DSPR_INPUT; +- reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = hlsl_reg.id; +- reg->idx_count = 1; +- *writemask = hlsl_reg.writemask; +- } +- } +- else if (var->is_output_semantic) +- { +- bool has_idx; +- +- if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); +- +- if (has_idx) +- { +- reg->idx[0].offset = var->semantic.index + offset / 4; +- reg->idx_count = 1; +- } +- +- if (shader_sm4_is_scalar_register(reg)) +- reg->dimension = VSIR_DIMENSION_SCALAR; +- else +- reg->dimension = VSIR_DIMENSION_VEC4; +- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); +- } +- else +- { +- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); +- +- VKD3D_ASSERT(hlsl_reg.allocated); +- reg->type = VKD3DSPR_OUTPUT; +- reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = hlsl_reg.id; +- reg->idx_count = 1; +- *writemask = hlsl_reg.writemask; +- } +- } +- else +- { +- enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; +- +- sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); +- } +-} +- +-static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, +- const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) +-{ +- unsigned int hlsl_swizzle; +- uint32_t writemask; +- +- sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); +- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) +- { +- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +- src->swizzle = swizzle_from_sm4(hlsl_swizzle); +- } +-} +- +-static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) +-{ +- sm4_register_from_node(&dst->reg, &dst->write_mask, instr); +-} +- +-static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, +- const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) +-{ +- src->swizzle = 0; +- src->reg.type = VKD3DSPR_IMMCONST; +- if (width == 1) +- { +- src->reg.dimension = VSIR_DIMENSION_SCALAR; +- src->reg.u.immconst_u32[0] = value->u[0].u; +- } +- else +- { +- unsigned int i, j = 0; +- +- src->reg.dimension = VSIR_DIMENSION_VEC4; +- for (i = 0; i < 4; ++i) +- { +- if ((map_writemask & (1u << i)) && (j < width)) +- src->reg.u.immconst_u32[i] = value->u[j++].u; +- else +- src->reg.u.immconst_u32[i] = 0; +- } +- } +-} +- +-static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, +- const struct hlsl_ir_node *instr, uint32_t map_writemask) +-{ +- unsigned int hlsl_swizzle; +- uint32_t writemask; +- +- if (instr->type == HLSL_IR_CONSTANT) +- { +- struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); +- +- sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); +- return; +- } +- +- sm4_register_from_node(&src->reg, &writemask, instr); +- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) +- { +- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +- src->swizzle = swizzle_from_sm4(hlsl_swizzle); +- } +-} +- + static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, + unsigned int i) + { +@@ -4650,204 +4318,41 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s + sm4_update_stat_counters(tpf, instr); + } + +-static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, +- const struct hlsl_ir_node *texel_offset) ++static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { +- struct sm4_instruction_modifier modif; +- struct hlsl_ir_constant *offset; +- +- if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) +- return false; +- offset = hlsl_ir_constant(texel_offset); +- +- modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; +- modif.u.aoffimmi.u = offset->value.u[0].i; +- modif.u.aoffimmi.v = 0; +- modif.u.aoffimmi.w = 0; +- if (offset->node.data_type->dimx > 1) +- modif.u.aoffimmi.v = offset->value.u[1].i; +- if (offset->node.data_type->dimx > 2) +- modif.u.aoffimmi.w = offset->value.u[2].i; +- if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 +- || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 +- || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) +- return false; +- +- instr->modifiers[instr->modifier_count++] = modif; +- return true; +-} +- +-static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) +-{ +- size_t size = (cbuffer->used_size + 3) / 4; ++ const struct vkd3d_shader_constant_buffer *cb = &ins->declaration.cb; ++ size_t size = (cb->size + 3) / 4; + + struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + +- .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, +- .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, +- .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, ++ .srcs[0] = cb->src, + .src_count = 1, + }; + +- if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) + { +- instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; +- instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; +- instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ ++ instr.srcs[0].reg.idx[0].offset = cb->src.reg.idx[0].offset; ++ instr.srcs[0].reg.idx[1].offset = cb->range.first; ++ instr.srcs[0].reg.idx[2].offset = cb->range.last; + instr.srcs[0].reg.idx_count = 3; + + instr.idx[0] = size; +- instr.idx[1] = cbuffer->reg.space; ++ instr.idx[1] = cb->range.space; + instr.idx_count = 2; + } + else + { +- instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; ++ instr.srcs[0].reg.idx[0].offset = cb->range.first; + instr.srcs[0].reg.idx[1].offset = size; + instr.srcs[0].reg.idx_count = 2; + } + +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) +-{ +- unsigned int i; +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_DCL_SAMPLER, +- +- .dsts[0].reg.type = VKD3DSPR_SAMPLER, +- .dst_count = 1, +- }; +- +- VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); +- +- if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) +- instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; +- +- for (i = 0; i < resource->bind_count; ++i) +- { +- if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) +- continue; +- +- if (hlsl_version_ge(tpf->ctx, 5, 1)) +- { +- VKD3D_ASSERT(!i); +- instr.dsts[0].reg.idx[0].offset = resource->id; +- instr.dsts[0].reg.idx[1].offset = resource->index; +- instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ +- instr.dsts[0].reg.idx_count = 3; +- +- instr.idx[0] = resource->space; +- instr.idx_count = 1; +- } +- else +- { +- instr.dsts[0].reg.idx[0].offset = resource->index + i; +- instr.dsts[0].reg.idx_count = 1; +- } +- write_sm4_instruction(tpf, &instr); +- } +-} +- +-static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, +- bool uav) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; +- struct hlsl_type *component_type; +- struct sm4_instruction instr; +- bool multisampled; +- unsigned int i; +- +- VKD3D_ASSERT(resource->regset == regset); +- +- component_type = resource->component_type; +- +- for (i = 0; i < resource->bind_count; ++i) +- { +- if (resource->var && !resource->var->objects_usage[regset][i].used) +- continue; +- +- instr = (struct sm4_instruction) +- { +- .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, +- .dsts[0].reg.idx[0].offset = resource->id + i, +- .dsts[0].reg.idx_count = 1, +- .dst_count = 1, +- +- .idx[0] = sm4_data_type(component_type) * 0x1111, +- .idx_count = 1, +- }; +- +- multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS +- || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; +- +- if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) +- { +- hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Multisampled texture object declaration needs sample count for profile %u.%u.", +- version->major, version->minor); +- } +- +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- VKD3D_ASSERT(!i); +- instr.dsts[0].reg.idx[0].offset = resource->id; +- instr.dsts[0].reg.idx[1].offset = resource->index; +- instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ +- instr.dsts[0].reg.idx_count = 3; +- +- instr.idx[1] = resource->space; +- instr.idx_count = 2; +- } +- else +- { +- instr.dsts[0].reg.idx[0].offset = resource->index + i; +- instr.dsts[0].reg.idx_count = 1; +- } +- +- if (uav) +- { +- switch (component_type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: +- instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; +- instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; +- break; +- case HLSL_SAMPLER_DIM_RAW_BUFFER: +- instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW; +- break; +- default: +- instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; +- break; +- } +- +- if (component_type->e.resource.rasteriser_ordered) +- instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT; +- } +- else +- { +- switch (component_type->sampler_dim) +- { +- case HLSL_SAMPLER_DIM_RAW_BUFFER: +- instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; +- break; +- default: +- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; +- break; +- } +- } +- instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); +- +- if (multisampled) +- instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; ++ if (ins->flags & VKD3DSI_INDEXED_DYNAMIC) ++ instr.extra_bits |= VKD3D_SM4_INDEX_TYPE_MASK; + +- write_sm4_instruction(tpf, &instr); +- } ++ write_sm4_instruction(tpf, &instr); + } + + static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) +@@ -4924,42 +4429,116 @@ static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vs + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) ++static void tpf_dcl_sampler(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { ++ const struct vkd3d_shader_sampler *sampler = &ins->declaration.sampler; + struct sm4_instruction instr = + { +- .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, +- .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, ++ .opcode = VKD3D_SM4_OP_DCL_SAMPLER, ++ .extra_bits = ins->flags << VKD3D_SM4_SAMPLER_MODE_SHIFT, ++ ++ .dsts[0].reg.type = VKD3DSPR_SAMPLER, ++ .dst_count = 1, + }; + +- write_sm4_instruction(tpf, &instr); +-} ++ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) ++ { ++ instr.dsts[0].reg.idx[0].offset = sampler->src.reg.idx[0].offset; ++ instr.dsts[0].reg.idx[1].offset = sampler->range.first; ++ instr.dsts[0].reg.idx[2].offset = sampler->range.last; ++ instr.dsts[0].reg.idx_count = 3; + +-static void tpf_write_hs_decls(const struct tpf_compiler *tpf) +-{ +- struct sm4_instruction instr = ++ instr.idx[0] = ins->declaration.sampler.range.space; ++ instr.idx_count = 1; ++ } ++ else + { +- .opcode = VKD3D_SM5_OP_HS_DECLS, +- }; ++ instr.dsts[0].reg.idx[0].offset = sampler->range.first; ++ instr.dsts[0].reg.idx_count = 1; ++ } + + write_sm4_instruction(tpf, &instr); + } + +-static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) ++static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, ++ const struct vkd3d_shader_structured_resource *structured_resource = &ins->declaration.structured_resource; ++ const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; ++ const struct vkd3d_shader_version *version = &tpf->program->shader_version; ++ const struct vkd3d_sm4_opcode_info *info; ++ struct sm4_instruction instr = {0}; ++ unsigned int i, k; ++ bool uav; ++ ++ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); ++ VKD3D_ASSERT(info); ++ ++ uav = ins->opcode == VKD3DSIH_DCL_UAV_TYPED ++ || ins->opcode == VKD3DSIH_DCL_UAV_RAW ++ || ins->opcode == VKD3DSIH_DCL_UAV_STRUCTURED; ++ ++ instr.opcode = info->opcode; ++ ++ instr.dsts[0] = semantic->resource.reg; ++ instr.dst_count = 1; ++ ++ for (k = 0; k < 4; ++k) ++ { ++ for (i = ARRAY_SIZE(data_type_table) - 1; i < ARRAY_SIZE(data_type_table); --i) ++ { ++ if (semantic->resource_data_type[k] == data_type_table[i]) ++ { ++ instr.idx[0] |= i << (4 * k); ++ break; ++ } ++ } ++ } ++ instr.idx_count = 1; ++ ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ instr.dsts[0].reg.idx[0].offset = semantic->resource.reg.reg.idx[0].offset; ++ instr.dsts[0].reg.idx[1].offset = semantic->resource.range.first; ++ instr.dsts[0].reg.idx[2].offset = semantic->resource.range.last; ++ instr.dsts[0].reg.idx_count = 3; ++ ++ instr.idx[1] = semantic->resource.range.space; ++ instr.idx_count = 2; ++ } ++ else ++ { ++ instr.dsts[0].reg.idx[0].offset = semantic->resource.range.first; ++ instr.dsts[0].reg.idx_count = 1; ++ } ++ ++ if (uav) ++ instr.extra_bits |= ins->flags << VKD3D_SM5_UAV_FLAGS_SHIFT; ++ ++ instr.extra_bits |= (sm4_resource_dimension(ins->resource_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); ++ instr.extra_bits |= semantic->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; ++ ++ if (ins->structured) ++ instr.byte_stride = structured_resource->byte_stride; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, ++ .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, + }; + + write_sm4_instruction(tpf, &instr); + } + +-static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) ++static void tpf_write_hs_decls(const struct tpf_compiler *tpf) + { + struct sm4_instruction instr = + { +- .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, ++ .opcode = VKD3D_SM5_OP_HS_DECLS, + }; + + write_sm4_instruction(tpf, &instr); +@@ -5022,594 +4601,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ret(const struct tpf_compiler *tpf) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_RET, +- }; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, +- const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, +- enum hlsl_sampler_dim dim) +-{ +- const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); +- bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE +- && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); +- bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; +- unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- if (uav) +- instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; +- else if (raw) +- instr.opcode = VKD3D_SM5_OP_LD_RAW; +- else +- instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7."); +- return; +- } +- } +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- if (!uav) +- { +- /* Mipmap level is in the last component in the IR, but needs to be in the W +- * component in the instruction. */ +- unsigned int dim_count = hlsl_sampler_dim_count(dim); +- +- if (dim_count == 1) +- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; +- if (dim_count == 2) +- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; +- } +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask); +- +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- +- instr.src_count = 2; +- +- if (multisampled) +- { +- if (sample_index->type == HLSL_IR_CONSTANT) +- { +- struct vkd3d_shader_register *reg = &instr.srcs[2].reg; +- struct hlsl_ir_constant *index; +- +- index = hlsl_ir_constant(sample_index); +- +- memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); +- reg->type = VKD3DSPR_IMMCONST; +- reg->dimension = VSIR_DIMENSION_SCALAR; +- reg->u.immconst_u32[0] = index->value.u[0].u; +- } +- else if (version->major == 4 && version->minor == 0) +- { +- hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); +- } +- else +- { +- sm4_src_from_node(tpf, &instr.srcs[2], sample_index, 0); +- } +- +- ++instr.src_count; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; +- const struct hlsl_ir_node *coords = load->coords.node; +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_deref *sampler = &load->sampler; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- switch (load->load_type) +- { +- case HLSL_RESOURCE_SAMPLE: +- instr.opcode = VKD3D_SM4_OP_SAMPLE; +- break; +- +- case HLSL_RESOURCE_SAMPLE_CMP: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_C; +- break; +- +- case HLSL_RESOURCE_SAMPLE_CMP_LZ: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; +- break; +- +- case HLSL_RESOURCE_SAMPLE_LOD: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; +- break; +- +- case HLSL_RESOURCE_SAMPLE_LOD_BIAS: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_B; +- break; +- +- case HLSL_RESOURCE_SAMPLE_GRAD: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; +- break; +- +- default: +- vkd3d_unreachable(); +- } +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7."); +- return; +- } +- } +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); +- instr.src_count = 3; +- +- if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD +- || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); +- ++instr.src_count; +- } +- else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); +- instr.src_count += 2; +- } +- else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP +- || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); +- ++instr.src_count; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; +- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) +- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER +- || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- { +- hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); +- return; +- } +- +- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_RESINFO; +- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) +- instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_IF, +- .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, +- .src_count = 1, +- }; +- +- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); +- +- sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &iff->then_block); +- +- if (!list_empty(&iff->else_block.instrs)) +- { +- instr.opcode = VKD3D_SM4_OP_ELSE; +- instr.src_count = 0; +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &iff->else_block); +- } +- +- instr.opcode = VKD3D_SM4_OP_ENDIF; +- instr.src_count = 0; +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) +-{ +- struct sm4_instruction instr = {0}; +- +- switch (jump->type) +- { +- case HLSL_IR_JUMP_BREAK: +- instr.opcode = VKD3D_SM4_OP_BREAK; +- break; +- +- case HLSL_IR_JUMP_CONTINUE: +- instr.opcode = VKD3D_SM4_OP_CONTINUE; +- break; +- +- case HLSL_IR_JUMP_DISCARD_NZ: +- { +- instr.opcode = VKD3D_SM4_OP_DISCARD; +- instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; +- +- memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); +- instr.src_count = 1; +- sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); +- break; +- } +- +- case HLSL_IR_JUMP_RETURN: +- vkd3d_unreachable(); +- +- default: +- hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); +- return; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-/* Does this variable's data come directly from the API user, rather than being +- * temporary or from a previous shader stage? +- * I.e. is it a uniform or VS input? */ +-static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) +-{ +- if (var->is_uniform) +- return true; +- +- return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; +-} +- +-static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- const struct hlsl_type *type = load->node.data_type; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- +- sm4_dst_from_node(&instr.dsts[0], &load->node); +- instr.dst_count = 1; +- +- VKD3D_ASSERT(hlsl_is_numeric_type(type)); +- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) +- { +- struct hlsl_constant_value value; +- +- /* Uniform bools can be specified as anything, but internal bools always +- * have 0 for false and ~0 for true. Normalize that here. */ +- +- instr.opcode = VKD3D_SM4_OP_MOVC; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); +- +- memset(&value, 0xff, sizeof(value)); +- sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); +- memset(&value, 0, sizeof(value)); +- sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); +- instr.src_count = 3; +- } +- else +- { +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); +- instr.src_count = 1; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_LOOP, +- }; +- +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &loop->body); +- +- instr.opcode = VKD3D_SM4_OP_ENDLOOP; +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, +- const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- struct vkd3d_shader_src_param *src; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- +- instr.opcode = VKD3D_SM4_OP_GATHER4; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- if (!vkd3d_shader_ver_ge(version, 5, 0)) +- { +- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); +- return; +- } +- instr.opcode = VKD3D_SM5_OP_GATHER4_PO; +- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); +- } +- } +- +- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr); +- +- src = &instr.srcs[instr.src_count++]; +- sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); +- src->reg.dimension = VSIR_DIMENSION_VEC4; +- src->swizzle = swizzle; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; +- const struct hlsl_ir_node *sample_index = load->sample_index.node; +- const struct hlsl_ir_node *coords = load->coords.node; +- +- if (load->sampler.var && !load->sampler.var->is_uniform) +- { +- hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); +- return; +- } +- +- if (!load->resource.var->is_uniform) +- { +- hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); +- return; +- } +- +- switch (load->load_type) +- { +- case HLSL_RESOURCE_LOAD: +- write_sm4_ld(tpf, &load->node, &load->resource, +- coords, sample_index, texel_offset, load->sampling_dim); +- break; +- +- case HLSL_RESOURCE_SAMPLE: +- case HLSL_RESOURCE_SAMPLE_CMP: +- case HLSL_RESOURCE_SAMPLE_CMP_LZ: +- case HLSL_RESOURCE_SAMPLE_LOD: +- case HLSL_RESOURCE_SAMPLE_LOD_BIAS: +- case HLSL_RESOURCE_SAMPLE_GRAD: +- /* Combined sample expressions were lowered. */ +- VKD3D_ASSERT(load->sampler.var); +- write_sm4_sample(tpf, load); +- break; +- +- case HLSL_RESOURCE_GATHER_RED: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(X, X, X, X), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_GREEN: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_BLUE: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_ALPHA: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(W, W, W, W), texel_offset); +- break; +- +- case HLSL_RESOURCE_SAMPLE_INFO: +- write_sm4_sampleinfo(tpf, load); +- break; +- +- case HLSL_RESOURCE_RESINFO: +- write_sm4_resinfo(tpf, load); +- break; +- +- case HLSL_RESOURCE_SAMPLE_PROJ: +- vkd3d_unreachable(); +- } +-} +- +-static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) +-{ +- struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); +- struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; +- struct sm4_instruction instr; +- +- if (!store->resource.var->is_uniform) +- { +- hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); +- return; +- } +- +- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- { +- hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); +- return; +- } +- +- memset(&instr, 0, sizeof(instr)); +- +- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr); +- instr.dst_count = 1; +- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) +- { +- instr.opcode = VKD3D_SM5_OP_STORE_RAW; +- instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx); +- } +- else +- { +- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; +- } +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) +-{ +- const struct hlsl_ir_node *rhs = store->rhs.node; +- struct sm4_instruction instr; +- uint32_t writemask; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); +- instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) +-{ +- const struct hlsl_ir_node *selector = s->selector.node; +- struct hlsl_ir_switch_case *c; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SWITCH; +- +- sm4_src_from_node(tpf, &instr.srcs[0], selector, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +- +- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) +- { +- memset(&instr, 0, sizeof(instr)); +- if (c->is_default) +- { +- instr.opcode = VKD3D_SM4_OP_DEFAULT; +- } +- else +- { +- struct hlsl_constant_value value = { .u[0].u = c->value }; +- +- instr.opcode = VKD3D_SM4_OP_CASE; +- sm4_src_from_constant_value(&instr.srcs[0], &value, 1, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 1; +- } +- +- write_sm4_instruction(tpf, &instr); +- write_sm4_block(tpf, &c->body); +- } +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_ENDSWITCH; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) +-{ +- unsigned int hlsl_swizzle; +- struct sm4_instruction instr; +- uint32_t writemask; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); +- instr.dst_count = 1; +- +- sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); +- hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), +- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); +- instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- + static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { ++ struct sm4_instruction_modifier *modifier; + const struct vkd3d_sm4_opcode_info *info; + struct sm4_instruction instr = {0}; + unsigned int dst_count, src_count; +@@ -5655,6 +4649,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + for (unsigned int i = 0; i < ins->src_count; ++i) + instr.srcs[i] = ins->src[i]; + ++ if (ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w) ++ { ++ VKD3D_ASSERT(instr.modifier_count < ARRAY_SIZE(instr.modifiers)); ++ modifier = &instr.modifiers[instr.modifier_count++]; ++ modifier->type = VKD3D_SM4_MODIFIER_AOFFIMMI; ++ modifier->u.aoffimmi.u = ins->texel_offset.u; ++ modifier->u.aoffimmi.v = ins->texel_offset.v; ++ modifier->u.aoffimmi.w = ins->texel_offset.w; ++ } ++ ++ if (info->is_conditional_op) ++ { ++ if (ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ) ++ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; ++ } ++ + write_sm4_instruction(tpf, &instr); + } + +@@ -5662,6 +4672,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + { + switch (ins->opcode) + { ++ case VKD3DSIH_DCL_CONSTANT_BUFFER: ++ tpf_dcl_constant_buffer(tpf, ins); ++ break; ++ + case VKD3DSIH_DCL_TEMPS: + tpf_dcl_temps(tpf, ins->declaration.count); + break; +@@ -5702,8 +4716,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); + break; + ++ case VKD3DSIH_DCL_SAMPLER: ++ tpf_dcl_sampler(tpf, ins); ++ break; ++ ++ case VKD3DSIH_DCL: ++ case VKD3DSIH_DCL_RESOURCE_RAW: ++ case VKD3DSIH_DCL_UAV_RAW: ++ case VKD3DSIH_DCL_UAV_STRUCTURED: ++ case VKD3DSIH_DCL_UAV_TYPED: ++ tpf_dcl_texture(tpf, ins); ++ break; ++ + case VKD3DSIH_ADD: + case VKD3DSIH_AND: ++ case VKD3DSIH_BREAK: ++ case VKD3DSIH_CASE: ++ case VKD3DSIH_CONTINUE: ++ case VKD3DSIH_DEFAULT: ++ case VKD3DSIH_DISCARD: + case VKD3DSIH_DIV: + case VKD3DSIH_DP2: + case VKD3DSIH_DP3: +@@ -5714,6 +4745,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_DSY: + case VKD3DSIH_DSY_COARSE: + case VKD3DSIH_DSY_FINE: ++ case VKD3DSIH_ELSE: ++ case VKD3DSIH_ENDIF: ++ case VKD3DSIH_ENDLOOP: ++ case VKD3DSIH_ENDSWITCH: + case VKD3DSIH_EQO: + case VKD3DSIH_EXP: + case VKD3DSIH_F16TOF32: +@@ -5721,9 +4756,14 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_FRC: + case VKD3DSIH_FTOI: + case VKD3DSIH_FTOU: ++ case VKD3DSIH_GATHER4: ++ case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_GEO: ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_IADD: + case VKD3DSIH_IEQ: ++ case VKD3DSIH_IF: + case VKD3DSIH_IGE: + case VKD3DSIH_ILT: + case VKD3DSIH_IMAD: +@@ -5735,7 +4775,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_ITOF: ++ case VKD3DSIH_LD: ++ case VKD3DSIH_LD2DMS: ++ case VKD3DSIH_LD_RAW: ++ case VKD3DSIH_LD_UAV_TYPED: + case VKD3DSIH_LOG: ++ case VKD3DSIH_LOOP: + case VKD3DSIH_LTO: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: +@@ -5747,14 +4792,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_NOT: + case VKD3DSIH_OR: + case VKD3DSIH_RCP: ++ case VKD3DSIH_RESINFO: ++ case VKD3DSIH_RET: + case VKD3DSIH_ROUND_NE: + case VKD3DSIH_ROUND_NI: + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: ++ case VKD3DSIH_SAMPLE: ++ case VKD3DSIH_SAMPLE_B: ++ case VKD3DSIH_SAMPLE_C: ++ case VKD3DSIH_SAMPLE_C_LZ: ++ case VKD3DSIH_SAMPLE_GRAD: + case VKD3DSIH_SAMPLE_INFO: ++ case VKD3DSIH_SAMPLE_LOD: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SQRT: ++ case VKD3DSIH_STORE_RAW: ++ case VKD3DSIH_STORE_UAV_TYPED: ++ case VKD3DSIH_SWITCH: + case VKD3DSIH_UDIV: + case VKD3DSIH_UGE: + case VKD3DSIH_ULT: +@@ -5772,102 +4828,23 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + } + } + +-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) ++static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_program *program) + { +- const struct hlsl_ir_node *instr; +- unsigned int vsir_instr_idx; +- +- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +- { +- if (instr->data_type) +- { +- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) +- { +- hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", +- instr->data_type->class); +- break; +- } +- +- if (!instr->reg.allocated) +- { +- VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); +- continue; +- } +- } +- +- switch (instr->type) +- { +- case HLSL_IR_CALL: +- case HLSL_IR_CONSTANT: +- vkd3d_unreachable(); +- +- case HLSL_IR_IF: +- write_sm4_if(tpf, hlsl_ir_if(instr)); +- break; +- +- case HLSL_IR_JUMP: +- write_sm4_jump(tpf, hlsl_ir_jump(instr)); +- break; +- +- case HLSL_IR_LOAD: +- write_sm4_load(tpf, hlsl_ir_load(instr)); +- break; +- +- case HLSL_IR_RESOURCE_LOAD: +- write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); +- break; +- +- case HLSL_IR_RESOURCE_STORE: +- write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); +- break; +- +- case HLSL_IR_LOOP: +- write_sm4_loop(tpf, hlsl_ir_loop(instr)); +- break; +- +- case HLSL_IR_STORE: +- write_sm4_store(tpf, hlsl_ir_store(instr)); +- break; +- +- case HLSL_IR_SWITCH: +- write_sm4_switch(tpf, hlsl_ir_switch(instr)); +- break; +- +- case HLSL_IR_SWIZZLE: +- write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); +- break; +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; +- tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); +- } +- } +-} ++ unsigned int i; + +-static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) +-{ + if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) + tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); + +- write_sm4_block(tpf, &func->body); +- +- write_sm4_ret(tpf); ++ for (i = 0; i < program->instructions.count; ++i) ++ tpf_handle_instruction(tpf, &program->instructions.elements[i]); + } + +-static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) ++static void tpf_write_shdr(struct tpf_compiler *tpf) + { +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; ++ const struct vsir_program *program = tpf->program; ++ const struct vkd3d_shader_version *version; + struct vkd3d_bytecode_buffer buffer = {0}; +- struct extern_resource *extern_resources; +- unsigned int extern_resources_count, i; +- const struct hlsl_buffer *cbuffer; +- struct hlsl_ctx *ctx = tpf->ctx; + size_t token_count_position; +- uint32_t global_flags = 0; + + static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = + { +@@ -5884,101 +4861,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec + + tpf->buffer = &buffer; + +- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); +- ++ version = &program->shader_version; + put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); + token_count_position = put_u32(&buffer, 0); + +- if (version->major == 4) +- { +- for (i = 0; i < extern_resources_count; ++i) +- { +- const struct extern_resource *resource = &extern_resources[i]; +- const struct hlsl_type *type = resource->component_type; +- +- if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) +- { +- global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; +- break; +- } +- } +- } +- +- if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) +- global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; +- +- if (global_flags) +- write_sm4_dcl_global_flags(tpf, global_flags); ++ if (program->global_flags) ++ write_sm4_dcl_global_flags(tpf, program->global_flags); + + if (version->type == VKD3D_SHADER_TYPE_HULL) + { + tpf_write_hs_decls(tpf); + +- tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ +- tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); +- tpf_write_dcl_tessellator_domain(tpf, ctx->domain); +- tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); +- tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); ++ tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); ++ tpf_write_dcl_output_control_point_count(tpf, program->output_control_point_count); ++ tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); ++ tpf_write_dcl_tessellator_partitioning(tpf, program->tess_partitioning); ++ tpf_write_dcl_tessellator_output_primitive(tpf, program->tess_output_primitive); + } + else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) + { +- tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ +- tpf_write_dcl_tessellator_domain(tpf, ctx->domain); +- } +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (cbuffer->reg.allocated) +- write_sm4_dcl_constant_buffer(tpf, cbuffer); +- } +- +- for (i = 0; i < extern_resources_count; ++i) +- { +- const struct extern_resource *resource = &extern_resources[i]; +- +- if (resource->regset == HLSL_REGSET_SAMPLERS) +- write_sm4_dcl_samplers(tpf, resource); +- else if (resource->regset == HLSL_REGSET_TEXTURES) +- write_sm4_dcl_textures(tpf, resource, false); +- else if (resource->regset == HLSL_REGSET_UAVS) +- write_sm4_dcl_textures(tpf, resource, true); ++ tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); ++ tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); + } + +- if (version->type == VKD3D_SHADER_TYPE_HULL) +- tpf_write_hs_control_point_phase(tpf); +- +- tpf_write_shader_function(tpf, entry_func); +- +- if (version->type == VKD3D_SHADER_TYPE_HULL) +- { +- tpf_write_hs_fork_phase(tpf); +- tpf_write_shader_function(tpf, ctx->patch_constant_func); +- } ++ tpf_write_program(tpf, program); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + +- add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); ++ add_section(tpf, TAG_SHDR, &buffer); + tpf->buffer = NULL; +- +- sm4_free_extern_resources(extern_resources, extern_resources_count); + } + + static void tpf_write_sfi0(struct tpf_compiler *tpf) + { +- struct extern_resource *extern_resources; +- unsigned int extern_resources_count; +- struct hlsl_ctx *ctx = tpf->ctx; + uint64_t *flags; + + flags = vkd3d_calloc(1, sizeof(*flags)); + +- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); +- for (unsigned int i = 0; i < extern_resources_count; ++i) +- { +- if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) +- *flags |= VKD3D_SM4_REQUIRES_ROVS; +- } +- sm4_free_extern_resources(extern_resources, extern_resources_count); ++ if (tpf->program->features.rovs) ++ *flags |= DXBC_SFI0_REQUIRES_ROVS; + + /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, + * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ +@@ -5993,7 +4914,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) + { + struct vkd3d_bytecode_buffer buffer = {0}; + const struct sm4_stat *stat = tpf->stat; +- struct hlsl_ctx *ctx = tpf->ctx; + + put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); +@@ -6025,7 +4945,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) + put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); + put_u32(&buffer, 0); /* Sample frequency */ + +- if (hlsl_version_ge(ctx, 5, 0)) ++ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 0)) + { + put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); + put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); +@@ -6037,15 +4957,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) + put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); + } + +- add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); ++ add_section(tpf, TAG_STAT, &buffer); ++} ++ ++static void tpf_write_section(struct tpf_compiler *tpf, uint32_t tag, const struct vkd3d_shader_code *code) ++{ ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ ++ bytecode_put_bytes(&buffer, code->code, code->size); ++ add_section(tpf, tag, &buffer); + } + +-/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving +- * data from the other parameters instead, so they can be removed from the +- * arguments and this function can be independent of HLSL structs. */ +-int tpf_compile(struct vsir_program *program, uint64_t config_flags, +- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, +- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) + { + enum vkd3d_shader_type shader_type = program->shader_version.type; + struct tpf_compiler tpf = {0}; +@@ -6053,7 +4977,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, + size_t i; + int ret; + +- tpf.ctx = ctx; + tpf.program = program; + tpf.buffer = NULL; + tpf.stat = &stat; +@@ -6064,14 +4987,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, + tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); + if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) + tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); +- write_sm4_rdef(ctx, &tpf.dxbc); +- tpf_write_shdr(&tpf, entry_func); ++ tpf_write_section(&tpf, TAG_RDEF, rdef); ++ tpf_write_shdr(&tpf); + tpf_write_sfi0(&tpf); + tpf_write_stat(&tpf); + + ret = VKD3D_OK; +- if (ctx->result) +- ret = ctx->result; + if (tpf.result) + ret = tpf.result; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index d751f2dc6bf..86ec8f15fb7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -23,6 +23,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -805,6 +807,9 @@ struct vkd3d_shader_scan_context + + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + size_t combined_samplers_size; ++ ++ enum vkd3d_shader_tessellator_output_primitive output_primitive; ++ enum vkd3d_shader_tessellator_partitioning partitioning; + }; + + static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, +@@ -1262,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, + instruction->declaration.structured_resource.byte_stride, false, instruction->flags); + break; ++ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: ++ context->output_primitive = instruction->declaration.tessellator_output_primitive; ++ break; ++ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: ++ context->partitioning = instruction->declaration.tessellator_partitioning; ++ break; + case VKD3DSIH_IF: + case VKD3DSIH_IFC: + cf_info = vkd3d_shader_scan_push_cf_info(context); +@@ -1502,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) + { + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; ++ struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; + struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; + struct vkd3d_shader_scan_descriptor_info *descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; +@@ -1530,6 +1542,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + descriptor_info1 = &local_descriptor_info1; + } + ++ tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); ++ + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, + descriptor_info1, combined_sampler_info, message_context); + +@@ -1573,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + if (!ret && descriptor_info) + ret = convert_descriptor_info(descriptor_info, descriptor_info1); + ++ if (!ret && tessellation_info) ++ { ++ tessellation_info->output_primitive = context.output_primitive; ++ tessellation_info->partitioning = context.partitioning; ++ } ++ + if (ret < 0) + { + if (combined_sampler_info) +@@ -1959,7 +1979,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + static const enum vkd3d_shader_target_type dxbc_tpf_types[] = + { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +-#ifdef HAVE_SPIRV_TOOLS ++#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + VKD3D_SHADER_TARGET_SPIRV_TEXT, + #endif + VKD3D_SHADER_TARGET_D3D_ASM, +@@ -1974,7 +1994,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + static const enum vkd3d_shader_target_type hlsl_types[] = + { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +-#ifdef HAVE_SPIRV_TOOLS ++#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + VKD3D_SHADER_TARGET_SPIRV_TEXT, + #endif + VKD3D_SHADER_TARGET_D3D_ASM, +@@ -1986,7 +2006,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + static const enum vkd3d_shader_target_type d3dbc_types[] = + { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +-#ifdef HAVE_SPIRV_TOOLS ++#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + VKD3D_SHADER_TARGET_SPIRV_TEXT, + #endif + VKD3D_SHADER_TARGET_D3D_ASM, +@@ -1996,7 +2016,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( + static const enum vkd3d_shader_target_type dxbc_dxil_types[] = + { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +-# ifdef HAVE_SPIRV_TOOLS ++#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) + VKD3D_SHADER_TARGET_SPIRV_TEXT, + # endif + VKD3D_SHADER_TARGET_D3D_ASM, +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index be7c0b73a22..3bfb0a7c3cd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -62,6 +62,8 @@ + #define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) + #define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1) + ++#define VKD3D_MAX_STREAM_COUNT 4 ++ + enum vkd3d_shader_error + { + VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, +@@ -163,6 +165,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037, + VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038, + VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, ++ VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL = 5040, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +@@ -247,6 +250,7 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, + VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, + VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, ++ VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE = 9023, + + VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, + +@@ -1123,6 +1127,12 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, + enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); + void shader_signature_cleanup(struct shader_signature *signature); + ++struct vsir_features ++{ ++ /* The shader requires rasteriser-ordered views. */ ++ bool rovs; ++}; ++ + struct dxbc_shader_desc + { + const uint32_t *byte_code; +@@ -1131,6 +1141,7 @@ struct dxbc_shader_desc + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; ++ struct vsir_features features; + }; + + struct vkd3d_shader_register_semantic +@@ -1400,9 +1411,10 @@ enum vsir_control_flow_type + + enum vsir_normalisation_level + { +- VSIR_NOT_NORMALISED, ++ VSIR_NORMALISED_SM1, ++ VSIR_NORMALISED_SM4, + VSIR_NORMALISED_HULL_CONTROL_POINT_IO, +- VSIR_FULLY_NORMALISED_IO, ++ VSIR_NORMALISED_SM6, + }; + + struct vsir_program +@@ -1428,9 +1440,16 @@ struct vsir_program + bool use_vocp; + bool has_point_size; + bool has_point_coord; ++ bool has_fog; + uint8_t diffuse_written_mask; + enum vsir_control_flow_type cf_type; + enum vsir_normalisation_level normalisation_level; ++ enum vkd3d_tessellator_domain tess_domain; ++ enum vkd3d_shader_tessellator_partitioning tess_partitioning; ++ enum vkd3d_shader_tessellator_output_primitive tess_output_primitive; ++ uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; ++ ++ struct vsir_features features; + + const char **block_names; + size_t block_name_count; +@@ -1643,6 +1662,10 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, + int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, + struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); + ++int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); ++ + int glsl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, + const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, +@@ -1661,6 +1684,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context); + ++int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); ++ + enum vkd3d_md5_variant + { + VKD3D_MD5_STANDARD, +@@ -1942,6 +1968,21 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, + + #define DXBC_MAX_SECTION_COUNT 7 + ++#define DXBC_SFI0_REQUIRES_DOUBLES 0x00000001u ++#define DXBC_SFI0_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002u ++#define DXBC_SFI0_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004u ++#define DXBC_SFI0_REQUIRES_64_UAVS 0x00000008u ++#define DXBC_SFI0_REQUIRES_MINIMUM_PRECISION 0x00000010u ++#define DXBC_SFI0_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020u ++#define DXBC_SFI0_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040u ++#define DXBC_SFI0_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080u ++#define DXBC_SFI0_REQUIRES_TILED_RESOURCES 0x00000100u ++#define DXBC_SFI0_REQUIRES_STENCIL_REF 0x00000200u ++#define DXBC_SFI0_REQUIRES_INNER_COVERAGE 0x00000400u ++#define DXBC_SFI0_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800u ++#define DXBC_SFI0_REQUIRES_ROVS 0x00001000u ++#define DXBC_SFI0_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000u ++ + struct dxbc_writer + { + unsigned int section_count; +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index a55a97f6f2f..6c7bf167910 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -1255,6 +1255,74 @@ VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint + return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); + } + ++static void vkd3d_vk_descriptor_pool_array_cleanup(struct vkd3d_vk_descriptor_pool_array *array) ++{ ++ vkd3d_free(array->pools); ++} ++ ++static void vkd3d_vk_descriptor_pool_array_init(struct vkd3d_vk_descriptor_pool_array *array) ++{ ++ memset(array, 0, sizeof(*array)); ++} ++ ++static bool vkd3d_vk_descriptor_pool_array_push_array(struct vkd3d_vk_descriptor_pool_array *array, ++ const struct vkd3d_vk_descriptor_pool *pools, size_t count) ++{ ++ if (!vkd3d_array_reserve((void **)&array->pools, &array->capacity, array->count + count, sizeof(*array->pools))) ++ return false; ++ ++ memcpy(&array->pools[array->count], pools, count * sizeof(*pools)); ++ array->count += count; ++ ++ return true; ++} ++ ++static bool vkd3d_vk_descriptor_pool_array_push(struct vkd3d_vk_descriptor_pool_array *array, ++ unsigned int descriptor_count, VkDescriptorPool vk_pool) ++{ ++ struct vkd3d_vk_descriptor_pool pool = ++ { ++ .descriptor_count = descriptor_count, ++ .vk_pool = vk_pool, ++ }; ++ ++ return vkd3d_vk_descriptor_pool_array_push_array(array, &pool, 1); ++} ++ ++static VkDescriptorPool vkd3d_vk_descriptor_pool_array_find(struct vkd3d_vk_descriptor_pool_array *array, ++ unsigned int *descriptor_count) ++{ ++ VkDescriptorPool vk_pool; ++ size_t i; ++ ++ for (i = 0; i < array->count; ++i) ++ { ++ if (array->pools[i].descriptor_count >= *descriptor_count) ++ { ++ *descriptor_count = array->pools[i].descriptor_count; ++ vk_pool = array->pools[i].vk_pool; ++ array->pools[i] = array->pools[--array->count]; ++ ++ return vk_pool; ++ } ++ } ++ ++ return VK_NULL_HANDLE; ++} ++ ++static void vkd3d_vk_descriptor_pool_array_destroy_pools(struct vkd3d_vk_descriptor_pool_array *array, ++ const struct d3d12_device *device) ++{ ++ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; ++ size_t i; ++ ++ for (i = 0; i < array->count; ++i) ++ { ++ VK_CALL(vkDestroyDescriptorPool(device->vk_device, array->pools[i].vk_pool, NULL)); ++ } ++ array->count = 0; ++} ++ + /* Command buffers */ + static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, + const char *message, ...) +@@ -1376,18 +1444,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat + return true; + } + +-static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator, +- VkDescriptorPool pool) +-{ +- if (!vkd3d_array_reserve((void **)&allocator->descriptor_pools, &allocator->descriptor_pools_size, +- allocator->descriptor_pool_count + 1, sizeof(*allocator->descriptor_pools))) +- return false; +- +- allocator->descriptor_pools[allocator->descriptor_pool_count++] = pool; +- +- return true; +-} +- + static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator, + struct vkd3d_view *view) + { +@@ -1426,37 +1482,71 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all + } + + static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( +- struct d3d12_command_allocator *allocator) ++ struct d3d12_command_allocator *allocator, enum vkd3d_shader_descriptor_type descriptor_type, ++ unsigned int descriptor_count, bool unbounded) + { + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + struct VkDescriptorPoolCreateInfo pool_desc; + VkDevice vk_device = device->vk_device; ++ VkDescriptorPoolSize vk_pool_sizes[2]; ++ unsigned int pool_size, pool_limit; + VkDescriptorPool vk_pool; + VkResult vr; + +- if (allocator->free_descriptor_pool_count > 0) +- { +- vk_pool = allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1]; +- allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1] = VK_NULL_HANDLE; +- --allocator->free_descriptor_pool_count; +- } +- else ++ if (!(vk_pool = vkd3d_vk_descriptor_pool_array_find(&allocator->free_descriptor_pools[descriptor_type], ++ &descriptor_count))) + { ++ pool_limit = device->vk_pool_limits[descriptor_type]; ++ ++ if (descriptor_count > pool_limit) ++ { ++ if (!unbounded) ++ { ++ ERR("Descriptor count %u exceeds maximum pool size %u.\n", descriptor_count, pool_limit); ++ return VK_NULL_HANDLE; ++ } ++ ++ WARN("Clamping descriptor count %u to maximum pool size %u for unbounded allocation.\n", ++ descriptor_count, pool_limit); ++ descriptor_count = pool_limit; ++ } ++ ++ pool_size = allocator->vk_pool_sizes[descriptor_type]; ++ if (descriptor_count > pool_size) ++ { ++ pool_size = 1u << (vkd3d_log2i(descriptor_count - 1) + 1); ++ pool_size = min(pool_limit, pool_size); ++ } ++ descriptor_count = pool_size; ++ ++ vk_pool_sizes[0].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, true); ++ vk_pool_sizes[0].descriptorCount = descriptor_count; ++ ++ vk_pool_sizes[1].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, false); ++ vk_pool_sizes[1].descriptorCount = descriptor_count; ++ + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_desc.pNext = NULL; + pool_desc.flags = 0; + pool_desc.maxSets = 512; +- pool_desc.poolSizeCount = device->vk_pool_count; +- pool_desc.pPoolSizes = device->vk_pool_sizes; ++ pool_desc.poolSizeCount = 1; ++ if (vk_pool_sizes[1].type != vk_pool_sizes[0].type) ++ ++pool_desc.poolSizeCount; ++ pool_desc.pPoolSizes = vk_pool_sizes; ++ + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) + { + ERR("Failed to create descriptor pool, vr %d.\n", vr); + return VK_NULL_HANDLE; + } ++ ++ if (!unbounded || descriptor_count < pool_limit) ++ allocator->vk_pool_sizes[descriptor_type] = min(pool_limit, descriptor_count * 2); + } + +- if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool))) ++ if (!(vkd3d_vk_descriptor_pool_array_push(&allocator->descriptor_pools[descriptor_type], ++ descriptor_count, vk_pool))) + { + ERR("Failed to add descriptor pool.\n"); + VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL)); +@@ -1466,9 +1556,9 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( + return vk_pool; + } + +-static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( +- struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout, +- unsigned int variable_binding_size, bool unbounded) ++static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(struct d3d12_command_allocator *allocator, ++ enum vkd3d_shader_descriptor_type descriptor_type, unsigned int descriptor_count, ++ VkDescriptorSetLayout vk_set_layout, unsigned int variable_binding_size, bool unbounded) + { + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +@@ -1478,14 +1568,15 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( + VkDescriptorSet vk_descriptor_set; + VkResult vr; + +- if (!allocator->vk_descriptor_pool) +- allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); +- if (!allocator->vk_descriptor_pool) ++ if (!allocator->vk_descriptor_pools[descriptor_type]) ++ allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, ++ descriptor_type, descriptor_count, unbounded); ++ if (!allocator->vk_descriptor_pools[descriptor_type]) + return VK_NULL_HANDLE; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_desc.pNext = NULL; +- set_desc.descriptorPool = allocator->vk_descriptor_pool; ++ set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; + set_desc.descriptorSetCount = 1; + set_desc.pSetLayouts = &vk_set_layout; + if (unbounded) +@@ -1499,16 +1590,17 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( + if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0) + return vk_descriptor_set; + +- allocator->vk_descriptor_pool = VK_NULL_HANDLE; ++ allocator->vk_descriptor_pools[descriptor_type] = VK_NULL_HANDLE; + if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR) +- allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); +- if (!allocator->vk_descriptor_pool) ++ allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, ++ descriptor_type, descriptor_count, unbounded); ++ if (!allocator->vk_descriptor_pools[descriptor_type]) + { + ERR("Failed to allocate descriptor set, vr %d.\n", vr); + return VK_NULL_HANDLE; + } + +- set_desc.descriptorPool = allocator->vk_descriptor_pool; ++ set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; + if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0) + { + FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr); +@@ -1534,38 +1626,50 @@ static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_devic + VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL)); + } + ++static void d3d12_command_allocator_reset_descriptor_pool_array(struct d3d12_command_allocator *allocator, ++ enum vkd3d_shader_descriptor_type type) ++{ ++ struct vkd3d_vk_descriptor_pool_array *array = &allocator->descriptor_pools[type]; ++ struct d3d12_device *device = allocator->device; ++ const struct vkd3d_vk_device_procs *vk_procs; ++ const struct vkd3d_vk_descriptor_pool *pool; ++ size_t i; ++ ++ vk_procs = &device->vk_procs; ++ for (i = 0; i < array->count; ++i) ++ { ++ pool = &array->pools[i]; ++ if (pool->descriptor_count < allocator->vk_pool_sizes[type] ++ || !vkd3d_vk_descriptor_pool_array_push_array(&allocator->free_descriptor_pools[type], pool, 1)) ++ VK_CALL(vkDestroyDescriptorPool(device->vk_device, pool->vk_pool, NULL)); ++ else ++ VK_CALL(vkResetDescriptorPool(device->vk_device, pool->vk_pool, 0)); ++ } ++ array->count = 0; ++} ++ + static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator, + bool keep_reusable_resources) + { + struct d3d12_device *device = allocator->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; +- unsigned int i, j; ++ unsigned int i; + +- allocator->vk_descriptor_pool = VK_NULL_HANDLE; ++ memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools)); + + if (keep_reusable_resources) + { +- if (vkd3d_array_reserve((void **)&allocator->free_descriptor_pools, +- &allocator->free_descriptor_pools_size, +- allocator->free_descriptor_pool_count + allocator->descriptor_pool_count, +- sizeof(*allocator->free_descriptor_pools))) ++ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) + { +- for (i = 0, j = allocator->free_descriptor_pool_count; i < allocator->descriptor_pool_count; ++i, ++j) +- { +- VK_CALL(vkResetDescriptorPool(device->vk_device, allocator->descriptor_pools[i], 0)); +- allocator->free_descriptor_pools[j] = allocator->descriptor_pools[i]; +- } +- allocator->free_descriptor_pool_count += allocator->descriptor_pool_count; +- allocator->descriptor_pool_count = 0; ++ d3d12_command_allocator_reset_descriptor_pool_array(allocator, i); + } + } + else + { +- for (i = 0; i < allocator->free_descriptor_pool_count; ++i) ++ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) + { +- VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->free_descriptor_pools[i], NULL)); ++ vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->free_descriptor_pools[i], device); + } +- allocator->free_descriptor_pool_count = 0; + } + + for (i = 0; i < allocator->transfer_buffer_count; ++i) +@@ -1586,11 +1690,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato + } + allocator->view_count = 0; + +- for (i = 0; i < allocator->descriptor_pool_count; ++i) ++ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) + { +- VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->descriptor_pools[i], NULL)); ++ vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->descriptor_pools[i], device); + } +- allocator->descriptor_pool_count = 0; + + for (i = 0; i < allocator->framebuffer_count; ++i) + { +@@ -1647,6 +1750,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo + { + struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); + unsigned int refcount = vkd3d_atomic_decrement_u32(&allocator->refcount); ++ size_t i; + + TRACE("%p decreasing refcount to %u.\n", allocator, refcount); + +@@ -1664,8 +1768,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo + vkd3d_free(allocator->transfer_buffers); + vkd3d_free(allocator->buffer_views); + vkd3d_free(allocator->views); +- vkd3d_free(allocator->descriptor_pools); +- vkd3d_free(allocator->free_descriptor_pools); ++ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) ++ { ++ vkd3d_vk_descriptor_pool_array_cleanup(&allocator->descriptor_pools[i]); ++ vkd3d_vk_descriptor_pool_array_cleanup(&allocator->free_descriptor_pools[i]); ++ } + vkd3d_free(allocator->framebuffers); + vkd3d_free(allocator->passes); + +@@ -1822,6 +1929,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo + struct vkd3d_queue *queue; + VkResult vr; + HRESULT hr; ++ size_t i; + + if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store))) + return hr; +@@ -1851,11 +1959,12 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo + return hresult_from_vk_result(vr); + } + +- allocator->vk_descriptor_pool = VK_NULL_HANDLE; ++ memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools)); + +- allocator->free_descriptor_pools = NULL; +- allocator->free_descriptor_pools_size = 0; +- allocator->free_descriptor_pool_count = 0; ++ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) ++ { ++ vkd3d_vk_descriptor_pool_array_init(&allocator->free_descriptor_pools[i]); ++ } + + allocator->passes = NULL; + allocator->passes_size = 0; +@@ -1865,9 +1974,11 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo + allocator->framebuffers_size = 0; + allocator->framebuffer_count = 0; + +- allocator->descriptor_pools = NULL; +- allocator->descriptor_pools_size = 0; +- allocator->descriptor_pool_count = 0; ++ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) ++ { ++ vkd3d_vk_descriptor_pool_array_init(&allocator->descriptor_pools[i]); ++ allocator->vk_pool_sizes[i] = min(VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE, device->vk_pool_limits[i]); ++ } + + allocator->views = NULL; + allocator->views_size = 0; +@@ -2749,7 +2860,8 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li + } + + vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, +- layout->vk_layout, variable_binding_size, unbounded_offset != UINT_MAX); ++ layout->descriptor_type, layout->descriptor_count + variable_binding_size, layout->vk_layout, ++ variable_binding_size, unbounded_offset != UINT_MAX); + bindings->descriptor_sets[bindings->descriptor_set_count++] = vk_descriptor_set; + } + +@@ -2805,15 +2917,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des + break; + } + +- if (range->descriptor_count == UINT_MAX) +- { +- vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; +- vk_descriptor_write->dstBinding = 0; +- } +- else +- { +- vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; +- } ++ vk_descriptor_write->dstSet = vk_descriptor_sets[range->image_set]; ++ vk_descriptor_write->dstBinding = use_array ? range->image_binding : range->image_binding + index; + + vk_image_info->sampler = VK_NULL_HANDLE; + vk_image_info->imageView = u.view->v.u.vk_image_view; +@@ -2934,10 +3039,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list + } + + static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write, +- const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set, ++ const struct d3d12_root_parameter *root_parameter, const VkDescriptorSet *vk_descriptor_sets, + VkBufferView *vk_buffer_view, const VkDescriptorBufferInfo *vk_buffer_info) + { + const struct d3d12_root_descriptor *root_descriptor; ++ VkDescriptorSet vk_descriptor_set; + + switch (root_parameter->parameter_type) + { +@@ -2956,6 +3062,7 @@ static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *v + } + + root_descriptor = &root_parameter->u.descriptor; ++ vk_descriptor_set = vk_descriptor_sets ? vk_descriptor_sets[root_descriptor->set] : VK_NULL_HANDLE; + + vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_write->pNext = NULL; +@@ -3011,7 +3118,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list + } + + if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], +- root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) ++ root_parameter, bindings->descriptor_sets, vk_buffer_view, vk_buffer_info)) + continue; + + ++descriptor_count; +@@ -3039,8 +3146,8 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma + uav_counter_count = state->uav_counters.binding_count; + if (!(vk_descriptor_writes = vkd3d_calloc(uav_counter_count, sizeof(*vk_descriptor_writes)))) + return; +- if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set( +- list->allocator, state->uav_counters.vk_set_layout, 0, false))) ++ if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_counter_count, state->uav_counters.vk_set_layout, 0, false))) + goto done; + + for (i = 0; i < uav_counter_count; ++i) +@@ -4612,8 +4719,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + + if (vk_info->KHR_push_descriptor) + { +- vk_write_descriptor_set_from_root_descriptor(&descriptor_write, +- root_parameter, VK_NULL_HANDLE, NULL, &buffer_info); ++ vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, NULL, &buffer_info); + VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, + root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); + } +@@ -4621,7 +4727,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + { + d3d12_command_list_prepare_descriptors(list, bind_point); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, +- root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); ++ root_parameter, bindings->descriptor_sets, NULL, &buffer_info); + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); + + VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); +@@ -4685,8 +4791,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + + if (vk_info->KHR_push_descriptor) + { +- vk_write_descriptor_set_from_root_descriptor(&descriptor_write, +- root_parameter, VK_NULL_HANDLE, &vk_buffer_view, NULL); ++ vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, &vk_buffer_view, NULL); + VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, + root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); + } +@@ -4694,7 +4799,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + { + d3d12_command_list_prepare_descriptors(list, bind_point); + vk_write_descriptor_set_from_root_descriptor(&descriptor_write, +- root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); ++ root_parameter, bindings->descriptor_sets, &vk_buffer_view, NULL); + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); + + VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); +@@ -5371,8 +5476,8 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, + view->info.texture.vk_view_type, view->format->type, &pipeline); + } + +- if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set( +- list->allocator, pipeline.vk_set_layout, 0, false))) ++ if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(list->allocator, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, 1, pipeline.vk_set_layout, 0, false))) + { + ERR("Failed to allocate descriptor set.\n"); + return; +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index e92373a36fa..7b491805a72 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -1473,16 +1473,21 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des + uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; + } + +- limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, +- properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); +- limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, +- properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); +- limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, +- properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); +- limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, +- properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); +- limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, +- properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); ++ limits->uniform_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, ++ properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision), ++ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); ++ limits->sampled_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSampledImages, ++ properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision), ++ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); ++ limits->storage_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, ++ properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision), ++ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); ++ limits->storage_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageImages, ++ properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision), ++ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); ++ limits->sampler_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSamplers, ++ properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision), ++ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); + limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); + } + +@@ -2677,39 +2682,16 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) + static void device_init_descriptor_pool_sizes(struct d3d12_device *device) + { + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; +- VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; ++ unsigned int *pool_sizes = device->vk_pool_limits; + +- if (device->use_vk_heaps) +- { +- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; +- pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, +- VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); +- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; +- pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; +- pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; +- pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); +- device->vk_pool_count = 3; +- return; +- } +- +- VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); +- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; +- pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, ++ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_CBV] = min(limits->uniform_buffer_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; +- pool_sizes[1].descriptorCount = min(limits->sampled_image_max_descriptors, ++ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SRV] = min(limits->sampled_image_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +- pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; +- pool_sizes[2].descriptorCount = pool_sizes[1].descriptorCount; +- pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; +- pool_sizes[3].descriptorCount = min(limits->storage_image_max_descriptors, ++ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_UAV] = min(limits->storage_image_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +- pool_sizes[4].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; +- pool_sizes[4].descriptorCount = pool_sizes[3].descriptorCount; +- pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; +- pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, ++ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER] = min(limits->sampler_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +- device->vk_pool_count = 6; + }; + + static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 2b0f81d3812..32f34479ea1 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -265,25 +265,6 @@ static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHA + } + } + +-static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, +- bool is_buffer) +-{ +- switch (type) +- { +- case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: +- return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; +- case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: +- return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; +- case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: +- return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; +- case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: +- return VK_DESCRIPTOR_TYPE_SAMPLER; +- default: +- FIXME("Unhandled descriptor range type type %#x.\n", type); +- return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; +- } +-} +- + static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( + D3D12_DESCRIPTOR_RANGE_TYPE type) + { +@@ -717,6 +698,8 @@ struct vk_binding_array + VkDescriptorSetLayoutBinding *bindings; + size_t capacity, count; + ++ enum vkd3d_shader_descriptor_type descriptor_type; ++ unsigned int descriptor_set; + unsigned int table_index; + unsigned int unbounded_offset; + VkDescriptorSetLayoutCreateFlags flags; +@@ -754,14 +737,24 @@ static bool vk_binding_array_add_binding(struct vk_binding_array *array, + return true; + } + ++static void vk_binding_array_make_unbound(struct vk_binding_array *array, ++ unsigned int offset, unsigned int table_index) ++{ ++ array->unbounded_offset = offset; ++ array->table_index = table_index; ++} ++ + struct vkd3d_descriptor_set_context + { + struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; ++ struct vk_binding_array *current_binding_array[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; + unsigned int table_index; +- unsigned int unbounded_offset; + unsigned int descriptor_index; + unsigned int uav_counter_index; + unsigned int push_constant_index; ++ ++ struct vk_binding_array *push_descriptor_set; ++ bool push_descriptor; + }; + + static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) +@@ -786,46 +779,66 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns + return true; + } + +-static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( +- struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) ++static struct vk_binding_array *d3d12_root_signature_append_vk_binding_array( ++ struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, ++ VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) + { ++ struct vk_binding_array *array; ++ unsigned int set; ++ + if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) + return NULL; + +- return &context->vk_bindings[root_signature->vk_set_count]; ++ set = root_signature->vk_set_count++; ++ array = &context->vk_bindings[set]; ++ array->descriptor_type = descriptor_type; ++ array->descriptor_set = set; ++ array->unbounded_offset = UINT_MAX; ++ array->flags = flags; ++ ++ return array; + } + +-static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, +- VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) ++static struct vk_binding_array *d3d12_root_signature_vk_binding_array_for_type( ++ struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, ++ struct vkd3d_descriptor_set_context *context) + { +- struct vk_binding_array *array; ++ struct vk_binding_array *array, **current; + +- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) +- return; ++ if (context->push_descriptor) ++ { ++ if (!context->push_descriptor_set) ++ context->push_descriptor_set = d3d12_root_signature_append_vk_binding_array(root_signature, ++ descriptor_type, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, context); + +- array->table_index = context->table_index; +- array->unbounded_offset = context->unbounded_offset; +- array->flags = flags; ++ return context->push_descriptor_set; ++ } + +- ++root_signature->vk_set_count; ++ current = context->current_binding_array; ++ if (!(array = current[descriptor_type])) ++ { ++ array = d3d12_root_signature_append_vk_binding_array(root_signature, descriptor_type, 0, context); ++ current[descriptor_type] = array; ++ } ++ ++ return array; + } + + static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, +- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, +- unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, +- unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, +- const VkSampler *immutable_sampler, unsigned int *binding_idx) ++ struct vk_binding_array *array, enum vkd3d_shader_descriptor_type descriptor_type, ++ unsigned int register_space, unsigned int register_idx, bool buffer_descriptor, ++ enum vkd3d_shader_visibility shader_visibility, unsigned int descriptor_count, ++ struct vkd3d_descriptor_set_context *context, const VkSampler *immutable_sampler) + { + struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets + ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; + struct vkd3d_shader_resource_binding *mapping; +- struct vk_binding_array *array; ++ VkDescriptorType vk_descriptor_type; + unsigned int idx; + +- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) +- || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], +- vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, +- stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) ++ vk_descriptor_type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor); ++ if (!vk_binding_array_add_binding(array, vk_descriptor_type, descriptor_count, ++ stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx)) + return E_OUTOFMEMORY; + + mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; +@@ -834,7 +847,7 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur + mapping->register_index = register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; +- mapping->binding.set = root_signature->vk_set_count; ++ mapping->binding.set = array->descriptor_set; + mapping->binding.binding = idx; + mapping->binding.count = descriptor_count; + if (offset) +@@ -843,12 +856,6 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur + offset->dynamic_offset_index = ~0u; + } + +- if (context->unbounded_offset != UINT_MAX) +- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); +- +- if (binding_idx) +- *binding_idx = idx; +- + return S_OK; + } + +@@ -911,7 +918,7 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro + } + + static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, +- const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, ++ struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, + unsigned int vk_binding_array_count, unsigned int bindings_per_range, + struct vkd3d_descriptor_set_context *context) + { +@@ -919,34 +926,49 @@ static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_r + bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; + enum vkd3d_shader_descriptor_type descriptor_type = range->type; + unsigned int i, register_space = range->register_space; ++ struct vk_binding_array *array; + HRESULT hr; + +- if (range->descriptor_count == UINT_MAX) +- context->unbounded_offset = range->offset; ++ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) ++ return E_OUTOFMEMORY; + ++ range->set = array->descriptor_set - root_signature->main_set; ++ range->binding = array->count; + for (i = 0; i < bindings_per_range; ++i) + { +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, + register_space, range->base_register_idx + i, is_buffer, shader_visibility, +- vk_binding_array_count, context, NULL, NULL))) ++ vk_binding_array_count, context, NULL))) + return hr; + } + +- if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) ++ if (range->descriptor_count == UINT_MAX) + { +- context->unbounded_offset = UINT_MAX; +- return S_OK; ++ vk_binding_array_make_unbound(array, range->offset, context->table_index); ++ context->current_binding_array[descriptor_type] = NULL; + } + ++ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) ++ return S_OK; ++ ++ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) ++ return E_OUTOFMEMORY; ++ ++ range->image_set = array->descriptor_set - root_signature->main_set; ++ range->image_binding = array->count; + for (i = 0; i < bindings_per_range; ++i) + { +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, + register_space, range->base_register_idx + i, false, shader_visibility, +- vk_binding_array_count, context, NULL, NULL))) ++ vk_binding_array_count, context, NULL))) + return hr; + } + +- context->unbounded_offset = UINT_MAX; ++ if (range->descriptor_count == UINT_MAX) ++ { ++ vk_binding_array_make_unbound(array, range->offset, context->table_index); ++ context->current_binding_array[descriptor_type] = NULL; ++ } + + return S_OK; + } +@@ -1199,16 +1221,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + + if (use_vk_heaps) + { +- /* set, binding and vk_binding_count are not used. */ ++ /* set, binding, image_set, image_binding, and vk_binding_count are not used. */ + range->set = 0; + range->binding = 0; ++ range->image_set = 0; ++ range->image_binding = 0; + range->vk_binding_count = 0; + d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); + continue; + } + +- range->set = root_signature->vk_set_count - root_signature->main_set; +- + if (root_signature->use_descriptor_arrays) + { + if (j && range->type != table->ranges[j - 1].type) +@@ -1229,6 +1251,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + + range->set = base_range->set; + range->binding = base_range->binding; ++ range->image_set = base_range->image_set; ++ range->image_binding = base_range->image_binding; + range->vk_binding_count = base_range->vk_binding_count - rel_offset; + d3d12_root_signature_map_descriptor_unbounded_binding(root_signature, range, + rel_offset, shader_visibility, context); +@@ -1251,8 +1275,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + bindings_per_range = range->descriptor_count; + } + +- range->binding = context->vk_bindings[root_signature->vk_set_count].count; +- + if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, + p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) + return hr; +@@ -1266,7 +1288,9 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) + { +- unsigned int binding, i; ++ enum vkd3d_shader_descriptor_type descriptor_type; ++ struct vk_binding_array *array; ++ unsigned int i; + HRESULT hr; + + root_signature->push_descriptor_mask = 0; +@@ -1281,14 +1305,19 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign + + root_signature->push_descriptor_mask |= 1u << i; + +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, +- vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), ++ descriptor_type = vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType); ++ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) ++ return E_OUTOFMEMORY; ++ ++ root_signature->parameters[i].parameter_type = p->ParameterType; ++ root_signature->parameters[i].u.descriptor.set = array->descriptor_set; ++ root_signature->parameters[i].u.descriptor.binding = array->count; ++ ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, + p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, +- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) ++ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL))) + return hr; + +- root_signature->parameters[i].parameter_type = p->ParameterType; +- root_signature->parameters[i].u.descriptor.binding = binding; + } + + return S_OK; +@@ -1298,10 +1327,19 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, + struct vkd3d_descriptor_set_context *context) + { ++ struct vk_binding_array *array; + unsigned int i; + HRESULT hr; + + VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); ++ ++ if (!desc->NumStaticSamplers) ++ return S_OK; ++ ++ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, context))) ++ return E_OUTOFMEMORY; ++ + for (i = 0; i < desc->NumStaticSamplers; ++i) + { + const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; +@@ -1309,16 +1347,13 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) + return hr; + +- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, + vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, +- &root_signature->static_samplers[i], NULL))) ++ &root_signature->static_samplers[i]))) + return hr; + } + +- if (device->use_vk_heaps) +- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); +- + return S_OK; + } + +@@ -1450,29 +1485,52 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, + return S_OK; + } + ++static HRESULT d3d12_descriptor_set_layout_init(struct d3d12_descriptor_set_layout *layout, ++ struct d3d12_device *device, const struct vk_binding_array *array) ++{ ++ unsigned int descriptor_count; ++ bool unbounded; ++ HRESULT hr; ++ size_t i; ++ ++ descriptor_count = array->unbounded_offset; ++ if (!(unbounded = descriptor_count != UINT_MAX)) ++ { ++ for (i = 0, descriptor_count = 0; i < array->count; ++i) ++ { ++ descriptor_count += array->bindings[i].descriptorCount; ++ } ++ } ++ ++ if (FAILED(hr = vkd3d_create_descriptor_set_layout(device, array->flags, ++ array->count, unbounded, array->bindings, &layout->vk_layout))) ++ return hr; ++ layout->descriptor_type = array->descriptor_type; ++ layout->descriptor_count = descriptor_count; ++ layout->unbounded_offset = array->unbounded_offset; ++ layout->table_index = array->table_index; ++ ++ return S_OK; ++} ++ + static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, + struct vkd3d_descriptor_set_context *context) + { + unsigned int i; + HRESULT hr; + +- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); +- + if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) + return E_INVALIDARG; + + for (i = 0; i < root_signature->vk_set_count; ++i) + { +- struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; +- struct vk_binding_array *array = &context->vk_bindings[i]; ++ const struct vk_binding_array *array = &context->vk_bindings[i]; + + VKD3D_ASSERT(array->count); + +- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, +- array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) ++ if (FAILED(hr = d3d12_descriptor_set_layout_init(&root_signature->descriptor_set_layouts[i], ++ root_signature->device, array))) + return hr; +- layout->unbounded_offset = array->unbounded_offset; +- layout->table_index = array->table_index; + } + + return S_OK; +@@ -1518,7 +1576,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + HRESULT hr; + + memset(&context, 0, sizeof(context)); +- context.unbounded_offset = UINT_MAX; + + root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; + root_signature->refcount = 1; +@@ -1580,17 +1637,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + sizeof(*root_signature->static_samplers)))) + goto fail; + ++ context.push_descriptor = vk_info->KHR_push_descriptor; + if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) + goto fail; +- +- /* We use KHR_push_descriptor for root descriptor parameters. */ +- if (vk_info->KHR_push_descriptor) +- { +- d3d12_root_signature_append_vk_binding_array(root_signature, +- VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); +- } +- +- root_signature->main_set = root_signature->vk_set_count; ++ root_signature->main_set = !!context.push_descriptor_set; ++ context.push_descriptor = false; + + if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, + root_signature->push_constant_ranges, &root_signature->push_constant_range_count))) +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index 97a99782d6a..8488d5db3fa 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -58,12 +58,17 @@ + #define VKD3D_MAX_VK_SYNC_OBJECTS 4u + #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u + #define VKD3D_MAX_DESCRIPTOR_SETS 64u ++/* Direct3D 12 binding tier 3 has a limit of "1,000,000+" CBVs, SRVs and UAVs. ++ * I am not sure what the "+" is supposed to mean: it probably hints that ++ * implementations may have an even higher limit, but that's pretty obvious, ++ * that table is for guaranteed minimum limits. */ ++#define VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS 1000000u + /* D3D12 binding tier 3 has a limit of 2048 samplers. */ + #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u +-/* The main limitation here is the simple descriptor pool recycling scheme +- * requiring each pool to contain all descriptor types used by vkd3d. Limit +- * this number to prevent excessive pool memory use. */ + #define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u) ++#define VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE 1024u ++ ++#define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1) + + extern uint64_t object_global_serial_id; + +@@ -770,6 +775,25 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE + void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, + struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc); + ++static inline VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, ++ bool is_buffer) ++{ ++ switch (type) ++ { ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: ++ return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: ++ return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: ++ return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; ++ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: ++ return VK_DESCRIPTOR_TYPE_SAMPLER; ++ default: ++ FIXME("Unhandled descriptor range type type %#x.\n", type); ++ return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; ++ } ++} ++ + enum vkd3d_vk_descriptor_set_index + { + VKD3D_SET_INDEX_SAMPLER, +@@ -899,6 +923,8 @@ struct d3d12_root_descriptor_table_range + unsigned int vk_binding_count; + uint32_t set; + uint32_t binding; ++ uint32_t image_set; ++ uint32_t image_binding; + + enum vkd3d_shader_descriptor_type type; + uint32_t descriptor_magic; +@@ -920,6 +946,7 @@ struct d3d12_root_constant + + struct d3d12_root_descriptor + { ++ uint32_t set; + uint32_t binding; + }; + +@@ -936,7 +963,9 @@ struct d3d12_root_parameter + + struct d3d12_descriptor_set_layout + { ++ enum vkd3d_shader_descriptor_type descriptor_type; + VkDescriptorSetLayout vk_layout; ++ unsigned int descriptor_count; + unsigned int unbounded_offset; + unsigned int table_index; + }; +@@ -1135,6 +1164,18 @@ struct vkd3d_buffer + VkDeviceMemory vk_memory; + }; + ++struct vkd3d_vk_descriptor_pool ++{ ++ unsigned int descriptor_count; ++ VkDescriptorPool vk_pool; ++}; ++ ++struct vkd3d_vk_descriptor_pool_array ++{ ++ struct vkd3d_vk_descriptor_pool *pools; ++ size_t capacity, count; ++}; ++ + /* ID3D12CommandAllocator */ + struct d3d12_command_allocator + { +@@ -1146,11 +1187,9 @@ struct d3d12_command_allocator + + VkCommandPool vk_command_pool; + +- VkDescriptorPool vk_descriptor_pool; ++ VkDescriptorPool vk_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; + +- VkDescriptorPool *free_descriptor_pools; +- size_t free_descriptor_pools_size; +- size_t free_descriptor_pool_count; ++ struct vkd3d_vk_descriptor_pool_array free_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; + + VkRenderPass *passes; + size_t passes_size; +@@ -1160,9 +1199,8 @@ struct d3d12_command_allocator + size_t framebuffers_size; + size_t framebuffer_count; + +- VkDescriptorPool *descriptor_pools; +- size_t descriptor_pools_size; +- size_t descriptor_pool_count; ++ struct vkd3d_vk_descriptor_pool_array descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; ++ unsigned int vk_pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; + + struct vkd3d_view **views; + size_t views_size; +@@ -1516,8 +1554,6 @@ struct vkd3d_desc_object_cache + size_t size; + }; + +-#define VKD3D_DESCRIPTOR_POOL_COUNT 6 +- + /* ID3D12Device */ + struct d3d12_device + { +@@ -1536,8 +1572,7 @@ struct d3d12_device + struct vkd3d_desc_object_cache view_desc_cache; + struct vkd3d_desc_object_cache cbuffer_desc_cache; + +- VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; +- unsigned int vk_pool_count; ++ unsigned int vk_pool_limits[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; + struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; + bool use_vk_heaps; + +-- +2.45.2 + diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch deleted file mode 100644 index 8d031ee5..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch +++ /dev/null @@ -1,1105 +0,0 @@ -From 71c65e41df0caa78d77dc42672b21f62f4d8b3c0 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 3 Dec 2024 09:14:28 +1100 -Subject: [PATCH] Updated vkd3d to 39cbef9e018ee760ffd175fdd6729e470529fb77. - ---- - libs/vkd3d/include/vkd3d_shader.h | 194 +++++++ - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 + - libs/vkd3d/libs/vkd3d-shader/ir.c | 498 +++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/msl.c | 34 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 99 ++-- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + - 6 files changed, 746 insertions(+), 82 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index cb561d7f079..af55d63a5c8 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -476,6 +476,109 @@ enum vkd3d_shader_binding_flag - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), - }; - -+/** -+ * The factor used to interpolate the fragment output colour with fog. -+ * -+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for specification of the -+ * interpolation factor as defined here. -+ * -+ * The following variables may be used to determine the interpolation factor: -+ * -+ * c = The fog coordinate value output from the vertex shader. This is an -+ * inter-stage varying with the semantic name "FOG" and semantic index 0. -+ * It may be modified by VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE. -+ * E = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_END. -+ * k = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE. -+ * -+ * \since 1.15 -+ */ -+enum vkd3d_shader_fog_fragment_mode -+{ -+ /** -+ * No fog interpolation is applied; -+ * the output colour is passed through unmodified. -+ * Equivalently, the fog interpolation factor is 1. -+ */ -+ VKD3D_SHADER_FOG_FRAGMENT_NONE = 0x0, -+ /** -+ * The fog interpolation factor is 2^-(k * c). -+ * -+ * In order to implement traditional exponential fog, as present in -+ * Direct3D and OpenGL, i.e. -+ * -+ * e^-(density * c) -+ * -+ * set -+ * -+ * k = density * logâ‚‚(e) -+ */ -+ VKD3D_SHADER_FOG_FRAGMENT_EXP = 0x1, -+ /** -+ * The fog interpolation factor is 2^-((k * c)²). -+ * -+ * In order to implement traditional square-exponential fog, as present in -+ * Direct3D and OpenGL, i.e. -+ * -+ * e^-((density * c)²) -+ * -+ * set -+ * -+ * k = density * √logâ‚‚(e) -+ */ -+ VKD3D_SHADER_FOG_FRAGMENT_EXP2 = 0x2, -+ /** -+ * The fog interpolation factor is (E - c) * k. -+ * -+ * In order to implement traditional linear fog, as present in Direct3D and -+ * OpenGL, i.e. -+ * -+ * (end - c) / (end - start) -+ * -+ * set -+ * -+ * E = end -+ * k = 1 / (end - start) -+ */ -+ VKD3D_SHADER_FOG_FRAGMENT_LINEAR = 0x3, -+}; -+ -+/** -+ * The source of the fog varying output by a pre-rasterization shader. -+ * The fog varying is defined as the output varying with the semantic name "FOG" -+ * and semantic index 0. -+ * -+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE for further documentation of this -+ * parameter. -+ * -+ * \since 1.15 -+ */ -+enum vkd3d_shader_fog_source -+{ -+ /** -+ * The source shader is not modified. That is, the fog varying in the target -+ * shader is the original fog varying if and only if present. -+ */ -+ VKD3D_SHADER_FOG_SOURCE_FOG = 0x0, -+ /** -+ * If the source shader has a fog varying, it is not modified. -+ * Otherwise, if the source shader outputs a varying with semantic name -+ * "COLOR" and semantic index 1 whose index includes a W component, -+ * said W component is output as fog varying. -+ * Otherwise, no fog varying is output. -+ */ -+ VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W = 0x1, -+ /** -+ * The fog source is the Z component of the position output by the vertex -+ * shader. -+ */ -+ VKD3D_SHADER_FOG_SOURCE_Z = 0x2, -+ /** -+ * The fog source is the W component of the position output by the vertex -+ * shader. -+ */ -+ VKD3D_SHADER_FOG_SOURCE_W = 0x3, -+}; -+ - /** - * The manner in which a parameter value is provided to the shader, used in - * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -@@ -739,6 +842,97 @@ enum vkd3d_shader_parameter_name - * \since 1.14 - */ - VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, -+ /** -+ * Fog mode used in fragment shaders. -+ * -+ * The value specified by this parameter must be a member of -+ * enum vkd3d_shader_fog_fragment_mode. -+ * -+ * If not VKD3D_SHADER_FOG_FRAGMENT_NONE, the pixel shader colour output at -+ * location 0 is linearly interpolated with the fog colour defined by -+ * VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR. The interpolation factor is -+ * defined according to the enumerant selected by this parameter. -+ * The interpolated value is then outputted instead of the original value at -+ * location 0. -+ * -+ * An interpolation factor of 0 specifies to use the fog colour; a factor of -+ * 1 specifies to use the original colour output. The interpolation factor -+ * is clamped to the [0, 1] range before interpolating. -+ * -+ * The default value is VKD3D_SHADER_FOG_FRAGMENT_NONE. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.15 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE, -+ /** -+ * Fog colour. -+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of -+ * fog. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. -+ * -+ * The default value is transparent black, i.e. the vector {0, 0, 0, 0}. -+ * -+ * \since 1.15 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, -+ /** -+ * End coordinate for linear fog. -+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of -+ * fog. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * The default value is 1.0. -+ * -+ * \since 1.15 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_FOG_END, -+ /** -+ * Scale value for fog. -+ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of -+ * fog. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * The default value is 1.0. -+ * -+ * \since 1.15 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, -+ /** -+ * Fog source. The value specified by this parameter must be a member of -+ * enum vkd3d_shader_fog_source. -+ * -+ * This parameter replaces or suppletes the fog varying output by a -+ * pre-rasterization shader. The fog varying is defined as the output -+ * varying with the semantic name "FOG" and semantic index 0. -+ * -+ * Together with other fog parameters, this parameter can be used to -+ * implement fixed function fog, as present in Direct3D versions up to 9, -+ * if the target environment does not support fog as part of its own -+ * fixed-function API (as Vulkan and core OpenGL). -+ * -+ * The default value is VKD3D_SHADER_FOG_SOURCE_FOG. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.15 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index bda9bc72f56..7db658fb541 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -968,6 +968,8 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const - - if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) - sm1->p.program->has_point_size = true; -+ if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_FOG) -+ sm1->p.program->has_fog = true; - } - - static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 0c06db9ff15..53b26dac76e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -1,5 +1,6 @@ - /* - * Copyright 2023 Conor McCarthy for CodeWeavers -+ * Copyright 2023-2024 Elizabeth Figura for CodeWeavers - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public -@@ -222,6 +223,14 @@ static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_ - src->reg.idx[0].offset = idx; - } - -+static void src_param_init_parameter_vec4(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) -+{ -+ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); -+ src->reg.idx[0].offset = idx; -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+} -+ - static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) - { - vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2); -@@ -251,6 +260,14 @@ static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigne - src->reg.idx[0].offset = idx; - } - -+static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); -+ src->reg.idx[0].offset = idx; -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+} -+ - static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) - { - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -@@ -306,6 +323,14 @@ static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigne - dst->reg.idx[0].offset = idx; - } - -+static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); -+ dst->reg.idx[0].offset = idx; -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+ dst->write_mask = VKD3DSP_WRITEMASK_ALL; -+} -+ - static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); -@@ -864,11 +889,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, - return VKD3D_OK; - } - -+static bool add_signature_element(struct shader_signature *signature, const char *semantic_name, -+ uint32_t semantic_index, uint32_t mask, uint32_t register_index, -+ enum vkd3d_shader_interpolation_mode interpolation_mode) -+{ -+ struct signature_element *new_elements, *e; -+ -+ if (!(new_elements = vkd3d_realloc(signature->elements, -+ (signature->element_count + 1) * sizeof(*signature->elements)))) -+ return false; -+ signature->elements = new_elements; -+ e = &signature->elements[signature->element_count++]; -+ memset(e, 0, sizeof(*e)); -+ e->semantic_name = vkd3d_strdup(semantic_name); -+ e->semantic_index = semantic_index; -+ e->sysval_semantic = VKD3D_SHADER_SV_NONE; -+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ e->register_count = 1; -+ e->mask = mask; -+ e->used_mask = mask; -+ e->register_index = register_index; -+ e->target_location = register_index; -+ e->interpolation_mode = interpolation_mode; -+ return true; -+} -+ - static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, - struct vsir_transformation_context *ctx) - { - struct shader_signature *signature = &program->output_signature; -- struct signature_element *new_elements, *e; -+ struct signature_element *e; - - if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) - return VKD3D_OK; -@@ -881,22 +931,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr - return VKD3D_OK; - } - -- if (!(new_elements = vkd3d_realloc(signature->elements, -- (signature->element_count + 1) * sizeof(*signature->elements)))) -+ if (!add_signature_element(signature, "COLOR", 0, VKD3DSP_WRITEMASK_ALL, SM1_COLOR_REGISTER_OFFSET, VKD3DSIM_NONE)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- signature->elements = new_elements; -- e = &signature->elements[signature->element_count++]; -- memset(e, 0, sizeof(*e)); -- e->semantic_name = vkd3d_strdup("COLOR"); -- e->sysval_semantic = VKD3D_SHADER_SV_NONE; -- e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -- e->register_count = 1; -- e->mask = VKD3DSP_WRITEMASK_ALL; -- e->used_mask = VKD3DSP_WRITEMASK_ALL; -- e->register_index = SM1_COLOR_REGISTER_OFFSET; -- e->target_location = SM1_COLOR_REGISTER_OFFSET; -- e->interpolation_mode = VKD3DSIM_NONE; -- - return VKD3D_OK; - } - -@@ -1051,6 +1087,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program - - e->target_location = map->input_register_index; - -+ TRACE("Mapping signature index %u (mask %#x) to target location %u (mask %#x).\n", -+ i, e->mask, map->input_register_index, map->input_mask); -+ - if ((input_mask & e->mask) == input_mask) - { - ++subset_varying_count; -@@ -1071,6 +1110,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program - } - else - { -+ TRACE("Marking signature index %u (mask %#x) as unused.\n", i, e->mask); -+ - e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; - } - -@@ -6685,6 +6726,423 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_program_add_fog_input(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ struct shader_signature *signature = &program->input_signature; -+ uint32_t register_idx = 0; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ return VKD3D_OK; -+ -+ if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)) -+ return VKD3D_OK; -+ -+ /* We could check the value and skip this if NONE, but chances are if a -+ * user specifies the fog fragment mode as a parameter, they'll want to -+ * enable it dynamically. Always specifying it (and hence always outputting -+ * it from the VS) avoids an extra VS variant. */ -+ -+ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) -+ return VKD3D_OK; -+ -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ register_idx = max(register_idx, signature->elements[i].register_index + 1); -+ -+ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program, -+ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode, -+ uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp, -+ size_t *ret_pos, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ struct vkd3d_shader_location loc = ret->location; -+ uint32_t ssa_factor = program->ssa_count++; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ uint32_t ssa_temp, ssa_temp2; -+ -+ switch (mode) -+ { -+ case VKD3D_SHADER_FOG_FRAGMENT_LINEAR: -+ /* We generate the following code: -+ * -+ * add sr0, FOG_END, -vFOG.x -+ * mul_sat srFACTOR, sr0, FOG_SCALE -+ */ -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ *ret_pos = pos + 4; -+ -+ ssa_temp = program->ssa_count++; -+ -+ ins = &program->instructions.elements[pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_ADD, 1, 2); -+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); -+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_END, VKD3D_DATA_FLOAT); -+ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); -+ ins->src[1].reg.idx[0].offset = fog_signature_idx; -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ ins->src[1].modifiers = VKD3DSPSM_NEG; -+ -+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); -+ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); -+ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; -+ src_param_init_ssa_float(&ins->src[0], ssa_temp); -+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); -+ break; -+ -+ case VKD3D_SHADER_FOG_FRAGMENT_EXP: -+ /* We generate the following code: -+ * -+ * mul sr0, FOG_SCALE, vFOG.x -+ * exp_sat srFACTOR, -sr0 -+ */ -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ *ret_pos = pos + 4; -+ -+ ssa_temp = program->ssa_count++; -+ -+ ins = &program->instructions.elements[pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); -+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); -+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); -+ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); -+ ins->src[1].reg.idx[0].offset = fog_signature_idx; -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ -+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); -+ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); -+ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; -+ src_param_init_ssa_float(&ins->src[0], ssa_temp); -+ ins->src[0].modifiers = VKD3DSPSM_NEG; -+ break; -+ -+ case VKD3D_SHADER_FOG_FRAGMENT_EXP2: -+ /* We generate the following code: -+ * -+ * mul sr0, FOG_SCALE, vFOG.x -+ * mul sr1, sr0, sr0 -+ * exp_sat srFACTOR, -sr1 -+ */ -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ *ret_pos = pos + 5; -+ -+ ssa_temp = program->ssa_count++; -+ ssa_temp2 = program->ssa_count++; -+ -+ ins = &program->instructions.elements[pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); -+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); -+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); -+ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); -+ ins->src[1].reg.idx[0].offset = fog_signature_idx; -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ -+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); -+ dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); -+ src_param_init_ssa_float(&ins->src[0], ssa_temp); -+ src_param_init_ssa_float(&ins->src[1], ssa_temp); -+ -+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); -+ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); -+ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; -+ src_param_init_ssa_float(&ins->src[0], ssa_temp2); -+ ins->src[0].modifiers = VKD3DSPSM_NEG; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ /* We generate the following code: -+ * -+ * add sr0, FRAG_COLOUR, -FOG_COLOUR -+ * mad oC0, sr0, srFACTOR, FOG_COLOUR -+ */ -+ -+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_ADD, 1, 2); -+ dst_param_init_ssa_float4(&ins->dst[0], program->ssa_count++); -+ src_param_init_temp_float4(&ins->src[0], colour_temp); -+ src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); -+ ins->src[1].modifiers = VKD3DSPSM_NEG; -+ -+ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MAD, 1, 3); -+ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, colour_signature_idx, -+ program->output_signature.elements[colour_signature_idx].mask); -+ src_param_init_ssa_float4(&ins->src[0], program->ssa_count - 1); -+ src_param_init_ssa_float(&ins->src[1], ssa_factor); -+ src_param_init_parameter_vec4(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; -+ uint32_t colour_signature_idx, fog_signature_idx, colour_temp; -+ const struct vkd3d_shader_parameter1 *mode_parameter = NULL; -+ static const struct vkd3d_shader_location no_loc; -+ const struct signature_element *fog_element; -+ enum vkd3d_shader_fog_fragment_mode mode; -+ struct vkd3d_shader_instruction *ins; -+ size_t new_pos; -+ int ret; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ return VKD3D_OK; -+ -+ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx)) -+ return VKD3D_OK; -+ -+ if (!(mode_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE))) -+ return VKD3D_OK; -+ -+ if (mode_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported fog fragment mode parameter type %#x.", mode_parameter->type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ if (mode_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid fog fragment mode parameter data type %#x.", mode_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ mode = mode_parameter->u.immediate_constant.u.u32; -+ -+ if (mode == VKD3D_SHADER_FOG_FRAGMENT_NONE) -+ return VKD3D_OK; -+ -+ /* Should have been added by vsir_program_add_fog_input(). */ -+ if (!(fog_element = vsir_signature_find_element_by_name(&program->input_signature, "FOG", 0))) -+ { -+ ERR("Fog input not found.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ fog_signature_idx = fog_element - program->input_signature.elements; -+ -+ /* We're going to be reading from the output, so we need to go -+ * through the whole shader and convert it to a temp. */ -+ colour_temp = program->temp_count++; -+ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ if (ins->opcode == VKD3DSIH_RET) -+ { -+ if ((ret = insert_fragment_fog_before_ret(program, ins, mode, fog_signature_idx, -+ colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) -+ return ret; -+ i = new_pos; -+ continue; -+ } -+ -+ for (size_t j = 0; j < ins->dst_count; ++j) -+ { -+ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; -+ -+ /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) -+ { -+ dst->reg.type = VKD3DSPR_TEMP; -+ dst->reg.idx[0].offset = colour_temp; -+ } -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_add_fog_output(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ struct shader_signature *signature = &program->output_signature; -+ const struct vkd3d_shader_parameter1 *source_parameter; -+ uint32_t register_idx = 0; -+ -+ if (!is_pre_rasterization_shader(program->shader_version.type)) -+ return VKD3D_OK; -+ -+ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) -+ return VKD3D_OK; -+ -+ if (source_parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ enum vkd3d_shader_fog_source source = source_parameter->u.immediate_constant.u.u32; -+ -+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG) -+ return VKD3D_OK; -+ -+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W -+ && !vsir_signature_find_element_by_name(signature, "COLOR", 1)) -+ return VKD3D_OK; -+ } -+ -+ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) -+ return VKD3D_OK; -+ -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ register_idx = max(register_idx, signature->elements[i].register_index + 1); -+ -+ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program, -+ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_source source, uint32_t temp, -+ uint32_t fog_signature_idx, uint32_t source_signature_idx, size_t *ret_pos) -+{ -+ const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[pos]; -+ -+ /* Write the fog output. */ -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); -+ src_param_init_temp_float4(&ins->src[0], temp); -+ if (source == VKD3D_SHADER_FOG_SOURCE_Z) -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); -+ else /* Position or specular W. */ -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); -+ ++ins; -+ -+ /* Write the position or specular output. */ -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), -+ source_signature_idx, e->mask); -+ src_param_init_temp_float4(&ins->src[0], temp); -+ ++ins; -+ -+ *ret_pos = pos + 2; -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; -+ const struct vkd3d_shader_parameter1 *source_parameter = NULL; -+ uint32_t fog_signature_idx, source_signature_idx, temp; -+ static const struct vkd3d_shader_location no_loc; -+ enum vkd3d_shader_fog_source source; -+ const struct signature_element *e; -+ -+ if (!is_pre_rasterization_shader(program->shader_version.type)) -+ return VKD3D_OK; -+ -+ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) -+ return VKD3D_OK; -+ -+ if (source_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported fog source parameter type %#x.", source_parameter->type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ if (source_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid fog source parameter data type %#x.", source_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ source = source_parameter->u.immediate_constant.u.u32; -+ -+ TRACE("Fog source %#x.\n", source); -+ -+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG) -+ return VKD3D_OK; -+ -+ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W) -+ { -+ if (program->has_fog || !(e = vsir_signature_find_element_by_name(&program->output_signature, "COLOR", 1))) -+ return VKD3D_OK; -+ source_signature_idx = e - program->output_signature.elements; -+ } -+ else -+ { -+ if (!vsir_signature_find_sysval(&program->output_signature, -+ VKD3D_SHADER_SV_POSITION, 0, &source_signature_idx)) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, -+ VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ } -+ -+ if (!(e = vsir_signature_find_element_by_name(&program->output_signature, "FOG", 0))) -+ { -+ ERR("Fog output not found.\n"); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ fog_signature_idx = e - program->output_signature.elements; -+ -+ temp = program->temp_count++; -+ -+ /* Insert a fog write before each ret, and convert either specular or -+ * position output to a temp. */ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ if (ins->opcode == VKD3DSIH_RET) -+ { -+ size_t new_pos; -+ int ret; -+ -+ if ((ret = insert_vertex_fog_before_ret(program, ins, source, temp, -+ fog_signature_idx, source_signature_idx, &new_pos)) < 0) -+ return ret; -+ i = new_pos; -+ continue; -+ } -+ -+ for (size_t j = 0; j < ins->dst_count; ++j) -+ { -+ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; -+ -+ /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == source_signature_idx) -+ { -+ dst->reg.type = VKD3DSPR_TEMP; -+ dst->reg.idx[0].offset = temp; -+ } -+ } -+ } -+ -+ program->has_fog = true; -+ -+ return VKD3D_OK; -+} -+ - struct validation_context - { - struct vkd3d_shader_message_context *message_context; -@@ -8769,6 +9227,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin - if (program->shader_version.major <= 2) - vsir_transform(&ctx, vsir_program_add_diffuse_output); - -+ /* For vsir_program_insert_fragment_fog(). */ -+ vsir_transform(&ctx, vsir_program_add_fog_input); -+ -+ /* For vsir_program_insert_vertex_fog(). */ -+ vsir_transform(&ctx, vsir_program_add_fog_output); -+ - return ctx.result; - } - -@@ -8823,6 +9287,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t - vsir_transform(&ctx, vsir_program_insert_point_size); - vsir_transform(&ctx, vsir_program_insert_point_size_clamp); - vsir_transform(&ctx, vsir_program_insert_point_coord); -+ vsir_transform(&ctx, vsir_program_insert_fragment_fog); -+ vsir_transform(&ctx, vsir_program_insert_vertex_fog); - - if (TRACE_ON()) - vsir_program_trace(program); -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 29f51088728..9a3c3ed885e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -41,6 +41,8 @@ struct msl_generator - const char *prefix; - bool failed; - -+ bool write_depth; -+ - const struct vkd3d_shader_interface_info *interface_info; - const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; - }; -@@ -153,6 +155,14 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, - msl_print_register_datatype(buffer, gen, reg->data_type); - break; - -+ case VKD3DSPR_DEPTHOUT: -+ if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled depth output in shader type #%x.", -+ gen->program->shader_version.type); -+ vkd3d_string_buffer_printf(buffer, "o_depth"); -+ break; -+ - case VKD3DSPR_IMMCONST: - switch (reg->dimension) - { -@@ -335,7 +345,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, - msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); - - msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); -- msl_print_write_mask(msl_dst->mask, write_mask); -+ if (vsir_dst->reg.dimension == VSIR_DIMENSION_VEC4) -+ msl_print_write_mask(msl_dst->mask, write_mask); - - return write_mask; - } -@@ -827,6 +838,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) - { - e = &signature->elements[i]; - -+ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) -+ { -+ gen->write_depth = true; -+ msl_print_indent(gen->buffer, 1); -+ vkd3d_string_buffer_printf(buffer, "float shader_out_depth [[depth(any)]];\n"); -+ continue; -+ } -+ - if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) - continue; - -@@ -936,6 +955,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) - { - e = &signature->elements[i]; - -+ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) -+ { -+ vkd3d_string_buffer_printf(buffer, " output.shader_out_depth = shader_out_depth;\n"); -+ continue; -+ } -+ - if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) - continue; - -@@ -995,9 +1020,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) - vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); - vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); - -+ if (gen->write_depth) -+ vkd3d_string_buffer_printf(gen->buffer, " float shader_out_depth;\n"); -+ - msl_generate_entrypoint_prologue(gen); - - vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); -+ if (gen->write_depth) -+ vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth"); - if (gen->descriptor_info->descriptor_count) - vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); - vkd3d_string_buffer_printf(gen->buffer, ");\n"); -@@ -1035,6 +1065,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader - "void %s_main(thread vkd3d_vec4 *v, " - "thread vkd3d_vec4 *o", - gen->prefix); -+ if (gen->write_depth) -+ vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth"); - if (gen->descriptor_info->descriptor_count) - vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); - vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 005b40a9d1f..649f92a57f3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -2406,6 +2406,7 @@ struct vkd3d_hull_shader_variables - struct ssa_register_info - { - enum vkd3d_data_type data_type; -+ uint8_t write_mask; - uint32_t id; - }; - -@@ -3315,13 +3316,19 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, - static const struct vkd3d_spec_constant_info - { - enum vkd3d_shader_parameter_name name; -- uint32_t default_value; -+ union -+ { -+ uint32_t u; -+ float f; -+ } default_value; - const char *debug_name; - } - vkd3d_shader_parameters[] = - { -- {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, -- {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, -+ {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, {.u = 1}, "sample_count"}, -+ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, {.f = 0.0f}, "alpha_test_ref"}, -+ {VKD3D_SHADER_PARAMETER_NAME_FOG_END, {.f = 1.0f}, "fog_end"}, -+ {VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, {.f = 1.0f}, "fog_scale"}, - }; - - static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) -@@ -3382,7 +3389,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile - const struct vkd3d_spec_constant_info *info; - - info = get_spec_constant_info(name); -- default_value = info ? info->default_value : 0; -+ default_value = info ? info->default_value.u : 0; - - scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); - vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); -@@ -3573,6 +3580,24 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, - register_info->is_aggregate = false; - return true; - } -+ else if (reg->type == VKD3DSPR_SSA) -+ { -+ const struct ssa_register_info *ssa = &compiler->ssa_register_info[reg->idx[0].offset]; -+ -+ if (!ssa->id) -+ { -+ /* Should only be from a missing instruction implementation. */ -+ VKD3D_ASSERT(compiler->failed); -+ return 0; -+ } -+ -+ memset(register_info, 0, sizeof(*register_info)); -+ register_info->id = ssa->id; -+ register_info->storage_class = SpvStorageClassMax; -+ register_info->component_type = vkd3d_component_type_from_data_type(ssa->data_type); -+ register_info->write_mask = ssa->write_mask; -+ return true; -+ } - - vkd3d_symbol_make_register(®_symbol, reg); - if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) -@@ -4180,67 +4205,14 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil - return const_id; - } - --static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, -- const struct vkd3d_shader_register *reg) --{ -- VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); -- VKD3D_ASSERT(reg->idx_count == 1); -- return &compiler->ssa_register_info[reg->idx[0].offset]; --} -- - static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *compiler, -- const struct vkd3d_shader_register *reg, uint32_t val_id) -+ const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) - { - unsigned int i = reg->idx[0].offset; - VKD3D_ASSERT(i < compiler->ssa_register_count); - compiler->ssa_register_info[i].data_type = reg->data_type; - compiler->ssa_register_info[i].id = val_id; --} -- --static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler, -- const struct vkd3d_shader_register *reg, enum vkd3d_shader_component_type component_type, -- uint32_t swizzle) --{ -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- enum vkd3d_shader_component_type reg_component_type; -- const struct ssa_register_info *ssa; -- unsigned int component_idx; -- uint32_t type_id, val_id; -- -- ssa = spirv_compiler_get_ssa_register_info(compiler, reg); -- val_id = ssa->id; -- if (!val_id) -- { -- /* Should only be from a missing instruction implementation. */ -- VKD3D_ASSERT(compiler->failed); -- return 0; -- } -- VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); -- -- reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); -- -- if (reg->dimension == VSIR_DIMENSION_SCALAR) -- { -- if (component_type != reg_component_type) -- { -- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); -- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); -- } -- -- return val_id; -- } -- -- if (component_type != reg_component_type) -- { -- /* Required for resource loads with sampled type int, because DXIL has no signedness. -- * Only 128-bit vector sizes are used. */ -- type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE); -- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); -- } -- -- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); -- component_idx = vsir_swizzle_get_component(swizzle, 0); -- return vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx); -+ compiler->ssa_register_info[i].write_mask = write_mask; - } - - static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, -@@ -4266,9 +4238,6 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - component_count = vsir_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); - -- if (reg->type == VKD3DSPR_SSA) -- return spirv_compiler_emit_load_ssa_reg(compiler, reg, component_type, swizzle); -- - if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) - { - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); -@@ -4293,9 +4262,9 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - type_id = vkd3d_spirv_get_type_id(builder, - reg_info.component_type, vsir_write_mask_component_count(reg_info.write_mask)); - val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone); -+ swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; - } - -- swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; - val_id = spirv_compiler_emit_swizzle(compiler, - val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); - -@@ -4496,7 +4465,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, - - if (reg->type == VKD3DSPR_SSA) - { -- spirv_compiler_set_ssa_register_info(compiler, reg, val_id); -+ spirv_compiler_set_ssa_register_info(compiler, reg, write_mask, val_id); - return; - } - -@@ -7431,7 +7400,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, - - general_implementation: - write_mask = dst->write_mask; -- if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) -+ if (data_type_is_64_bit(src->reg.data_type) && !data_type_is_64_bit(dst->reg.data_type)) - write_mask = vsir_write_mask_64_from_32(write_mask); - else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) - write_mask = vsir_write_mask_32_from_64(write_mask); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index ad04972b3fb..55b28cdd875 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1428,6 +1428,7 @@ struct vsir_program - bool use_vocp; - bool has_point_size; - bool has_point_coord; -+ bool has_fog; - uint8_t diffuse_written_mask; - enum vsir_control_flow_type cf_type; - enum vsir_normalisation_level normalisation_level; --- -2.45.2 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-36fda8e28ca31517ae051b2e46b00d71a23.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-36fda8e28ca31517ae051b2e46b00d71a23.patch deleted file mode 100644 index 15f9b92a..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-36fda8e28ca31517ae051b2e46b00d71a23.patch +++ /dev/null @@ -1,1803 +0,0 @@ -From 242ae04e65c6b50dbd8506852dac2b347b3b3a87 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 4 Dec 2024 07:19:12 +1100 -Subject: [PATCH] Updated vkd3d to 36fda8e28ca31517ae051b2e46b00d71a23c01a8. - ---- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 34 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 205 ++++++------ - libs/vkd3d/libs/vkd3d-shader/glsl.c | 7 - - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 156 +++++----- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 2 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 134 +++++--- - libs/vkd3d/libs/vkd3d-shader/ir.c | 292 +++++++++--------- - libs/vkd3d/libs/vkd3d-shader/msl.c | 65 +++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 166 ++++------ - .../libs/vkd3d-shader/vkd3d_shader_private.h | 2 + - 10 files changed, 551 insertions(+), 512 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 7c5444f63a3..8c96befadea 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -49,7 +49,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_BFREV ] = "bfrev", - [VKD3DSIH_BRANCH ] = "branch", - [VKD3DSIH_BREAK ] = "break", -- [VKD3DSIH_BREAKC ] = "breakc", -+ [VKD3DSIH_BREAKC ] = "break", - [VKD3DSIH_BREAKP ] = "breakp", - [VKD3DSIH_BUFINFO ] = "bufinfo", - [VKD3DSIH_CALL ] = "call", -@@ -183,7 +183,7 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_IDIV ] = "idiv", - [VKD3DSIH_IEQ ] = "ieq", - [VKD3DSIH_IF ] = "if", -- [VKD3DSIH_IFC ] = "ifc", -+ [VKD3DSIH_IFC ] = "if", - [VKD3DSIH_IGE ] = "ige", - [VKD3DSIH_ILT ] = "ilt", - [VKD3DSIH_IMAD ] = "imad", -@@ -815,7 +815,7 @@ static void shader_print_dcl_usage(struct vkd3d_d3d_asm_compiler *compiler, - usage = "tessfactor"; - break; - case VKD3D_DECL_USAGE_POSITIONT: -- usage = "positionT"; -+ usage = "positiont"; - indexed = true; - break; - case VKD3D_DECL_USAGE_FOG: -@@ -2547,6 +2547,33 @@ static void trace_signature(const struct shader_signature *signature, const char - vkd3d_string_buffer_cleanup(&buffer); - } - -+static void trace_io_declarations(const struct vsir_program *program) -+{ -+ struct vkd3d_string_buffer buffer; -+ bool empty = true; -+ unsigned int i; -+ -+ vkd3d_string_buffer_init(&buffer); -+ -+ vkd3d_string_buffer_printf(&buffer, "Input/output declarations:"); -+ -+ for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) -+ { -+ if (bitmap_is_set(program->io_dcls, i)) -+ { -+ empty = false; -+ vkd3d_string_buffer_printf(&buffer, " %u", i); -+ } -+ } -+ -+ if (empty) -+ vkd3d_string_buffer_printf(&buffer, " empty"); -+ -+ TRACE("%s\n", buffer.buffer); -+ -+ vkd3d_string_buffer_cleanup(&buffer); -+} -+ - void vsir_program_trace(const struct vsir_program *program) - { - const unsigned int flags = VSIR_ASM_FLAG_DUMP_TYPES | VSIR_ASM_FLAG_DUMP_ALL_INDICES; -@@ -2556,6 +2583,7 @@ void vsir_program_trace(const struct vsir_program *program) - trace_signature(&program->input_signature, "Input"); - trace_signature(&program->output_signature, "Output"); - trace_signature(&program->patch_constant_signature, "Patch-constant"); -+ trace_io_declarations(program); - - if (d3d_asm_compile(program, NULL, &code, flags) != VKD3D_OK) - return; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 7db658fb541..7ffd060d833 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -235,7 +235,7 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = - /* Arithmetic */ - {VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP}, - {VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV}, -- {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}, {~0u, ~0u}}, -+ {VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}}, - {VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD}, - {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, - {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, -@@ -248,22 +248,22 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = - {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, - {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, - {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, -- {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, -+ {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, - {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, - {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, - {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, - {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, - {VKD3D_SM1_OP_LIT, 1, 1, VKD3DSIH_LIT}, - {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, -- {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, -+ {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP, {2, 0}}, - {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, -- {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, -- {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, -- {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, { 2, 1}}, -- {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM,}, -- {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, -- {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, -+ {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, -+ {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, -+ {VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, {2, 1}}, -+ {VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}}, -+ {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, -+ {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, -+ {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, - /* Matrix */ - {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, - {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, -@@ -274,27 +274,27 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = - {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, - /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, -- {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 0}}, - /* Control flow */ -- {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, -- {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}, {~0u, ~0u}}, -- -- {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, -- {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, -+ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}}, -+ {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}}, -+ {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}}, -+ {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, -+ {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}}, -+ {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}}, -+ {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, -+ {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, -+ {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, -+ {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}}, -+ {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}}, -+ {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}}, -+ {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}}, -+ {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}}, -+ {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}}, -+ -+ {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, -+ {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, - {0, 0, 0, VKD3DSIH_INVALID}, - }; - -@@ -307,89 +307,84 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = - {VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB}, - {VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD}, - {VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL}, -- {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP}, -- {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ}, -+ {VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP, {2, 0}}, -+ {VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ, {2, 0}}, - {VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3}, -- {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4}, -- {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN}, -- {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX}, -- {VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT}, -- {VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE}, -- {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS}, -- {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP}, -- {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG}, -- {VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP}, -- {VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP}, -- {VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST}, -+ {VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4, {1, 2}}, -+ {VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN, {2, 0}}, -+ {VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX, {2, 0}}, -+ {VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS, {2, 0}}, -+ {VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP, {2, 0}}, -+ {VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG, {2, 0}}, - {VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP}, -- {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC}, -- {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, { 1, 4}}, -- {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}, { 3, 0}}, -- {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW}, -- {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS}, -- {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM}, -- {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}}, -- {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}, {~0u, ~0u}}, -+ {VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC, {2, 0}}, -+ {VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, {1, 4}}, -+ {VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}}, -+ {VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW, {2, 0}}, -+ {VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS, {2, 0}}, -+ {VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM, {2, 0}}, -+ {VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, {2, 1}}, -+ {VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}}, -+ {VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}}, - /* Matrix */ -- {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4}, -- {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3}, -- {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4}, -- {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3}, -- {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2}, -+ {VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4, {2, 0}}, -+ {VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3, {2, 0}}, -+ {VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4, {2, 0}}, -+ {VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3, {2, 0}}, -+ {VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2, {2, 0}}, - /* Declarations */ -- {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL}, -+ {VKD3D_SM1_OP_DCL, 0, 0, VKD3DSIH_DCL, {2, 0}}, - /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, -- {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, -- {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, -+ {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB, {2, 0}}, -+ {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI, {2, 1}}, - /* Control flow */ -- {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP}, -- {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}, {~0u, ~0u}}, -+ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}}, -+ {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}}, -+ {VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}}, -+ {VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}}, -+ {VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}}, -+ {VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}}, -+ {VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}}, -+ {VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}}, -+ {VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP, {2, 1}}, -+ {VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}}, -+ {VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}}, -+ {VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}}, -+ {VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}}, -+ {VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}}, -+ {VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}}, - /* Texture */ -- {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1 ,4}, { 1, 4}}, -- {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1 ,0}, { 3, 0}}, -- {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, { 1, 4}}, -- {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, { 0, 0}}, -- {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, { 1, 3}}, -- {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, { 1, 4}}, -- {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, { 1, 4}}, -- {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP}, -- {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}}, -- {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE}, -+ {VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1, 4}, {1, 4}}, -+ {VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1, 0}}, -+ {VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, {1, 4}}, -+ {VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}}, -+ {VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, {0, 0}}, -+ {VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, {1, 3}}, -+ {VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, {1, 4}}, -+ {VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, {1, 4}}, -+ {VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}}, -+ {VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}}, -+ {VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}}, -+ {VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP, {2, 1}}, -+ {VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}}, -+ {VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE, {1, 4}, {1, 4}}, - {0, 0, 0, VKD3DSIH_INVALID}, - }; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 0df0e30f399..113c7eee65f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -1507,13 +1507,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_DCL_INDEXABLE_TEMP: - shader_glsl_dcl_indexable_temp(gen, ins); - break; -- case VKD3DSIH_DCL_INPUT: -- case VKD3DSIH_DCL_INPUT_PS: -- case VKD3DSIH_DCL_INPUT_PS_SGV: -- case VKD3DSIH_DCL_INPUT_PS_SIV: -- case VKD3DSIH_DCL_INPUT_SGV: -- case VKD3DSIH_DCL_OUTPUT: -- case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; - case VKD3DSIH_DEFAULT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 97c6c0a1377..f0d24b835e5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2700,10 +2700,8 @@ struct hlsl_ir_function_decl *hlsl_get_func_decl(struct hlsl_ctx *ctx, const cha - return NULL; - } - --struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) -+static void hlsl_dump_type(struct vkd3d_string_buffer *buffer, const struct hlsl_type *type) - { -- struct vkd3d_string_buffer *string, *inner_string; -- - static const char *const base_types[] = - { - [HLSL_TYPE_FLOAT] = "float", -@@ -2727,31 +2725,28 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - [HLSL_SAMPLER_DIM_CUBEARRAY] = "CubeArray", - }; - -- if (!(string = hlsl_get_string_buffer(ctx))) -- return NULL; -- - if (type->name) - { -- vkd3d_string_buffer_printf(string, "%s", type->name); -- return string; -+ vkd3d_string_buffer_printf(buffer, "%s", type->name); -+ return; - } - - switch (type->class) - { - case HLSL_CLASS_SCALAR: - VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); -- return string; -+ vkd3d_string_buffer_printf(buffer, "%s", base_types[type->e.numeric.type]); -+ return; - - case HLSL_CLASS_VECTOR: - VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); -- return string; -+ vkd3d_string_buffer_printf(buffer, "%s%u", base_types[type->e.numeric.type], type->dimx); -+ return; - - case HLSL_CLASS_MATRIX: - VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); -- vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); -- return string; -+ vkd3d_string_buffer_printf(buffer, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); -+ return; - - case HLSL_CLASS_ARRAY: - { -@@ -2760,102 +2755,85 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) - ; - -- if ((inner_string = hlsl_type_to_string(ctx, t))) -- { -- vkd3d_string_buffer_printf(string, "%s", inner_string->buffer); -- hlsl_release_string_buffer(ctx, inner_string); -- } -- -+ hlsl_dump_type(buffer, t); - for (t = type; t->class == HLSL_CLASS_ARRAY; t = t->e.array.type) - { - if (t->e.array.elements_count == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -- vkd3d_string_buffer_printf(string, "[]"); -+ vkd3d_string_buffer_printf(buffer, "[]"); - else -- vkd3d_string_buffer_printf(string, "[%u]", t->e.array.elements_count); -+ vkd3d_string_buffer_printf(buffer, "[%u]", t->e.array.elements_count); - } -- return string; -+ return; - } - - case HLSL_CLASS_STRUCT: -- vkd3d_string_buffer_printf(string, ""); -- return string; -+ vkd3d_string_buffer_printf(buffer, ""); -+ return; - - case HLSL_CLASS_TEXTURE: - if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) - { -- vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); -- return string; -+ vkd3d_string_buffer_printf(buffer, "ByteAddressBuffer"); -+ return; - } - - if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { -- vkd3d_string_buffer_printf(string, "Texture"); -- return string; -+ vkd3d_string_buffer_printf(buffer, "Texture"); -+ return; - } - - VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); - VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - { -- vkd3d_string_buffer_printf(string, "Buffer"); -+ vkd3d_string_buffer_printf(buffer, "Buffer<"); - } - else - { - VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); -- vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); -- } -- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -- { -- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -- hlsl_release_string_buffer(ctx, inner_string); -+ vkd3d_string_buffer_printf(buffer, "Texture%s<", dimensions[type->sampler_dim]); - } -- return string; -+ hlsl_dump_type(buffer, type->e.resource.format); -+ vkd3d_string_buffer_printf(buffer, ">"); -+ return; - - case HLSL_CLASS_UAV: - if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) - { -- vkd3d_string_buffer_printf(string, "RWByteAddressBuffer"); -- return string; -+ vkd3d_string_buffer_printf(buffer, "RWByteAddressBuffer"); -+ return; - } - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) -- vkd3d_string_buffer_printf(string, "RWBuffer"); -+ vkd3d_string_buffer_printf(buffer, "RWBuffer<"); - else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -- vkd3d_string_buffer_printf(string, "RWStructuredBuffer"); -+ vkd3d_string_buffer_printf(buffer, "RWStructuredBuffer<"); - else -- vkd3d_string_buffer_printf(string, "RWTexture%s", dimensions[type->sampler_dim]); -- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -- { -- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -- hlsl_release_string_buffer(ctx, inner_string); -- } -- return string; -+ vkd3d_string_buffer_printf(buffer, "RWTexture%s<", dimensions[type->sampler_dim]); -+ hlsl_dump_type(buffer, type->e.resource.format); -+ vkd3d_string_buffer_printf(buffer, ">"); -+ return; - - case HLSL_CLASS_CONSTANT_BUFFER: -- vkd3d_string_buffer_printf(string, "ConstantBuffer"); -- if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -- { -- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -- hlsl_release_string_buffer(ctx, inner_string); -- } -- return string; -+ vkd3d_string_buffer_printf(buffer, "ConstantBuffer<"); -+ hlsl_dump_type(buffer, type->e.resource.format); -+ vkd3d_string_buffer_printf(buffer, ">"); -+ return; - - case HLSL_CLASS_ERROR: -- vkd3d_string_buffer_printf(string, ""); -- return string; -+ vkd3d_string_buffer_printf(buffer, ""); -+ return; - - case HLSL_CLASS_STREAM_OUTPUT: - if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) -- vkd3d_string_buffer_printf(string, "PointStream"); -+ vkd3d_string_buffer_printf(buffer, "PointStream<"); - else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) -- vkd3d_string_buffer_printf(string, "LineStream"); -+ vkd3d_string_buffer_printf(buffer, "LineStream<"); - else -- vkd3d_string_buffer_printf(string, "TriangleStream"); -- if ((inner_string = hlsl_type_to_string(ctx, type->e.so.type))) -- { -- vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -- hlsl_release_string_buffer(ctx, inner_string); -- } -- return string; -+ vkd3d_string_buffer_printf(buffer, "TriangleStream<"); -+ hlsl_dump_type(buffer, type->e.so.type); -+ vkd3d_string_buffer_printf(buffer, ">"); -+ return; - - case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: -@@ -2878,8 +2856,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - break; - } - -- vkd3d_string_buffer_printf(string, ""); -- return string; -+ vkd3d_string_buffer_printf(buffer, ""); -+} -+ -+struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const struct hlsl_type *type) -+{ -+ struct vkd3d_string_buffer *buffer; -+ -+ if (!(buffer = hlsl_get_string_buffer(ctx))) -+ return NULL; -+ hlsl_dump_type(buffer, type); -+ return buffer; - } - - struct vkd3d_string_buffer *hlsl_component_to_string(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var, -@@ -3042,7 +3029,8 @@ static void dump_ir_var(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer - vkd3d_string_buffer_printf(buffer, "%s ", string->buffer); - hlsl_release_string_buffer(ctx, string); - } -- vkd3d_string_buffer_printf(buffer, "%s %s", debug_hlsl_type(ctx, var->data_type), var->name); -+ hlsl_dump_type(buffer, var->data_type); -+ vkd3d_string_buffer_printf(buffer, " %s", var->name); - if (var->semantic.name) - vkd3d_string_buffer_printf(buffer, " : %s%u", var->semantic.name, var->semantic.index); - } -@@ -3123,34 +3111,28 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) - return vkd3d_dbg_sprintf(".%s", string); - } - --static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) -+void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, -+ struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f) - { -- const struct hlsl_ir_function_decl *decl = call->decl; -- struct vkd3d_string_buffer *string; - size_t i; - -- if (!(string = hlsl_type_to_string(ctx, decl->return_type))) -- return; -- -- vkd3d_string_buffer_printf(buffer, "call %s %s(", string->buffer, decl->func->name); -- hlsl_release_string_buffer(ctx, string); -- -- for (i = 0; i < decl->parameters.count; ++i) -+ hlsl_dump_type(buffer, f->return_type); -+ vkd3d_string_buffer_printf(buffer, " %s(", f->func->name); -+ for (i = 0; i < f->parameters.count; ++i) - { -- const struct hlsl_ir_var *param = decl->parameters.vars[i]; -- -- if (!(string = hlsl_type_to_string(ctx, param->data_type))) -- return; -- - if (i) - vkd3d_string_buffer_printf(buffer, ", "); -- vkd3d_string_buffer_printf(buffer, "%s", string->buffer); -- -- hlsl_release_string_buffer(ctx, string); -+ dump_ir_var(ctx, buffer, f->parameters.vars[i]); - } - vkd3d_string_buffer_printf(buffer, ")"); - } - -+static void dump_ir_call(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_call *call) -+{ -+ vkd3d_string_buffer_printf(buffer, "call "); -+ hlsl_dump_ir_function_decl(ctx, buffer, call->decl); -+} -+ - static void dump_ir_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_constant *constant) - { - struct hlsl_type *type = constant->node.data_type; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 25d1b8df947..addc98d5a43 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -1437,6 +1437,8 @@ void hlsl_block_cleanup(struct hlsl_block *block); - bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); - - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); -+void hlsl_dump_ir_function_decl(struct hlsl_ctx *ctx, -+ struct vkd3d_string_buffer *buffer, const struct hlsl_ir_function_decl *f); - void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); - - bool hlsl_state_block_add_entry(struct hlsl_state_block *state_block, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 5bcd5e9034b..afa41f4b1c2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -475,7 +475,11 @@ static bool add_explicit_conversion(struct hlsl_ctx *ctx, struct hlsl_block *blo - for (i = 0; i < arrays->count; ++i) - { - if (arrays->sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) -+ { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Implicit size arrays not allowed in casts."); -+ dst_type = ctx->builtin_types.error; -+ break; -+ } - dst_type = hlsl_new_array_type(ctx, dst_type, arrays->sizes[i]); - } - -@@ -1190,6 +1194,8 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays not allowed in struct fields."); -+ field->type = ctx->builtin_types.error; -+ break; - } - - field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); -@@ -1280,6 +1286,12 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays not allowed in typedefs."); -+ if (!(type = hlsl_type_clone(ctx, ctx->builtin_types.error, 0, 0))) -+ { -+ free_parse_variable_def(v); -+ ret = false; -+ } -+ break; - } - - if (!(type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]))) -@@ -2668,26 +2680,30 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Only innermost array size can be implicit."); -- v->initializer.args_count = 0; -+ type = ctx->builtin_types.error; -+ break; - } - else if (elem_components == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Cannot declare an implicit size array of a size 0 type."); -- v->initializer.args_count = 0; -+ type = ctx->builtin_types.error; -+ break; - } - else if (size == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays need to be initialized."); -- v->initializer.args_count = 0; -+ type = ctx->builtin_types.error; -+ break; - } - else if (size % elem_components != 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Cannot initialize implicit size array with %u components, expected a multiple of %u.", - size, elem_components); -- v->initializer.args_count = 0; -+ type = ctx->builtin_types.error; -+ break; - } - else - { -@@ -2906,7 +2922,8 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - v->initializer.args[0] = node_from_block(v->initializer.instrs); - } - -- initialize_var(ctx, var, &v->initializer, is_default_values_initializer); -+ if (var->data_type->class != HLSL_CLASS_ERROR) -+ initialize_var(ctx, var, &v->initializer, is_default_values_initializer); - - if (is_default_values_initializer) - { -@@ -2995,9 +3012,16 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, - const char *name, const struct parse_initializer *args, bool is_compile, - const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_function_decl *decl, *compatible_match = NULL; -+ struct hlsl_ir_function_decl *decl; -+ struct vkd3d_string_buffer *s; - struct hlsl_ir_function *func; - struct rb_entry *entry; -+ size_t i; -+ struct -+ { -+ struct hlsl_ir_function_decl **candidates; -+ size_t count, capacity; -+ } candidates = {0}; - - if (!(entry = rb_get(&ctx->functions, name))) - return NULL; -@@ -3005,18 +3029,41 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, - - LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) - { -- if (func_is_compatible_match(ctx, decl, is_compile, args)) -+ if (!func_is_compatible_match(ctx, decl, is_compile, args)) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&candidates.candidates, -+ &candidates.capacity, candidates.count + 1, sizeof(decl)))) -+ { -+ vkd3d_free(candidates.candidates); -+ return NULL; -+ } -+ candidates.candidates[candidates.count++] = decl; -+ } -+ -+ if (!candidates.count) -+ return NULL; -+ -+ if (candidates.count > 1) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL, "Ambiguous function call."); -+ if ((s = hlsl_get_string_buffer(ctx))) - { -- if (compatible_match) -+ hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, "Candidates are:"); -+ for (i = 0; i < candidates.count; ++i) - { -- hlsl_fixme(ctx, loc, "Prioritize between multiple compatible function overloads."); -- break; -+ hlsl_dump_ir_function_decl(ctx, s, candidates.candidates[i]); -+ hlsl_note(ctx, loc, VKD3D_SHADER_LOG_ERROR, " %s;", s->buffer); -+ vkd3d_string_buffer_clear(s); - } -- compatible_match = decl; -+ hlsl_release_string_buffer(ctx, s); - } - } - -- return compatible_match; -+ decl = candidates.candidates[0]; -+ vkd3d_free(candidates.candidates); -+ -+ return decl; - } - - static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) -@@ -5445,6 +5492,17 @@ static struct hlsl_block *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - -+ if (!hlsl_is_numeric_type(type)) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Constructor data type %s is not numeric.", string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ return NULL; -+ } -+ - if (!(var = hlsl_new_synthetic_var(ctx, "constructor", type, loc))) - return NULL; - -@@ -7688,7 +7746,10 @@ parameter_decl: - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays not allowed in function parameters."); -+ type = ctx->builtin_types.error; -+ break; - } -+ - type = hlsl_new_array_type(ctx, type, $4.sizes[i]); - } - vkd3d_free($4.sizes); -@@ -8110,14 +8171,9 @@ typedef: - } - - if (modifiers) -- { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Storage modifiers are not allowed on typedefs."); -- LIST_FOR_EACH_ENTRY_SAFE(v, v_next, $4, struct parse_variable_def, entry) -- vkd3d_free(v); -- vkd3d_free($4); -- YYABORT; -- } -+ - if (!add_typedef(ctx, type, $4)) - YYABORT; - } -@@ -9001,17 +9057,24 @@ primary_expr: - struct hlsl_ir_load *load; - struct hlsl_ir_var *var; - -- if (!(var = hlsl_get_var(ctx->cur_scope, $1))) -+ if ((var = hlsl_get_var(ctx->cur_scope, $1))) -+ { -+ vkd3d_free($1); -+ -+ if (!(load = hlsl_new_var_load(ctx, var, &@1))) -+ YYABORT; -+ if (!($$ = make_block(ctx, &load->node))) -+ YYABORT; -+ } -+ else - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Variable \"%s\" is not defined.", $1); - vkd3d_free($1); -- YYABORT; -+ -+ if (!($$ = make_empty_block(ctx))) -+ YYABORT; -+ $$->value = ctx->error_instr; - } -- vkd3d_free($1); -- if (!(load = hlsl_new_var_load(ctx, var, &@1))) -- YYABORT; -- if (!($$ = make_block(ctx, &load->node))) -- YYABORT; - } - | '(' expr ')' - { -@@ -9171,23 +9234,8 @@ postfix_expr: - | var_modifiers type '(' initializer_expr_list ')' - { - if ($1) -- { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers are not allowed on constructors."); -- free_parse_initializer(&$4); -- YYABORT; -- } -- if (!hlsl_is_numeric_type($2)) -- { -- struct vkd3d_string_buffer *string; -- -- if ((string = hlsl_type_to_string(ctx, $2))) -- hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Constructor data type %s is not numeric.", string->buffer); -- hlsl_release_string_buffer(ctx, string); -- free_parse_initializer(&$4); -- YYABORT; -- } - - if (!($$ = add_constructor(ctx, $2, &$4, &@2))) - { -@@ -9255,11 +9303,8 @@ unary_expr: - | '(' var_modifiers type arrays ')' unary_expr - { - if ($2) -- { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers are not allowed on casts."); -- YYABORT; -- } - - if (!add_explicit_conversion(ctx, $6, $3, &$4, &@3)) - { -@@ -9403,10 +9448,7 @@ assignment_expr: - struct hlsl_ir_node *lhs = node_from_block($1), *rhs = node_from_block($3); - - if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) -- { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Statement modifies a const expression."); -- YYABORT; -- } - hlsl_block_add_block($3, $1); - destroy_block($1); - if (!add_assignment(ctx, $3, lhs, $2, rhs)) -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 53b26dac76e..64c9585af52 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -750,6 +750,76 @@ static enum vkd3d_result vsir_program_lower_texldd(struct vsir_program *program, - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_program_lower_dcl_input(struct vsir_program *program, -+ struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) -+{ -+ switch (ins->declaration.dst.reg.type) -+ { -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_OUTPUT: -+ case VKD3DSPR_PATCHCONST: -+ case VKD3DSPR_INCONTROLPOINT: -+ case VKD3DSPR_OUTCONTROLPOINT: -+ break; -+ -+ case VKD3DSPR_PRIMID: -+ case VKD3DSPR_FORKINSTID: -+ case VKD3DSPR_JOININSTID: -+ case VKD3DSPR_THREADID: -+ case VKD3DSPR_THREADGROUPID: -+ case VKD3DSPR_LOCALTHREADID: -+ case VKD3DSPR_LOCALTHREADINDEX: -+ case VKD3DSPR_COVERAGE: -+ case VKD3DSPR_TESSCOORD: -+ case VKD3DSPR_OUTPOINTID: -+ case VKD3DSPR_GSINSTID: -+ case VKD3DSPR_WAVELANECOUNT: -+ case VKD3DSPR_WAVELANEINDEX: -+ bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); -+ break; -+ -+ default: -+ vkd3d_shader_error(ctx->message_context, &ins->location, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Internal compiler error: invalid register type %#x for DCL_INPUT.", -+ ins->declaration.dst.reg.type); -+ return VKD3D_ERROR; -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_lower_dcl_output(struct vsir_program *program, -+ struct vkd3d_shader_instruction *ins, struct vsir_transformation_context *ctx) -+{ -+ switch (ins->declaration.dst.reg.type) -+ { -+ case VKD3DSPR_INPUT: -+ case VKD3DSPR_OUTPUT: -+ case VKD3DSPR_PATCHCONST: -+ case VKD3DSPR_INCONTROLPOINT: -+ case VKD3DSPR_OUTCONTROLPOINT: -+ break; -+ -+ case VKD3DSPR_DEPTHOUT: -+ case VKD3DSPR_SAMPLEMASK: -+ case VKD3DSPR_DEPTHOUTGE: -+ case VKD3DSPR_DEPTHOUTLE: -+ case VKD3DSPR_OUTSTENCILREF: -+ bitmap_set(program->io_dcls, ins->declaration.dst.reg.type); -+ break; -+ -+ default: -+ vkd3d_shader_error(ctx->message_context, &ins->location, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Internal compiler error: invalid register type %#x for DCL_OUTPUT.", -+ ins->declaration.dst.reg.type); -+ return VKD3D_ERROR; -+ } -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, - struct vsir_transformation_context *ctx) - { -@@ -790,6 +860,25 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - vkd3d_shader_instruction_make_nop(ins); - break; - -+ case VKD3DSIH_DCL_INPUT: -+ vsir_program_lower_dcl_input(program, ins, ctx); -+ vkd3d_shader_instruction_make_nop(ins); -+ break; -+ -+ case VKD3DSIH_DCL_OUTPUT: -+ vsir_program_lower_dcl_output(program, ins, ctx); -+ vkd3d_shader_instruction_make_nop(ins); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_SGV: -+ case VKD3DSIH_DCL_INPUT_SIV: -+ case VKD3DSIH_DCL_INPUT_PS: -+ case VKD3DSIH_DCL_INPUT_PS_SGV: -+ case VKD3DSIH_DCL_INPUT_PS_SIV: -+ case VKD3DSIH_DCL_OUTPUT_SIV: -+ vkd3d_shader_instruction_make_nop(ins); -+ break; -+ - case VKD3DSIH_SINCOS: - if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) - return ret; -@@ -1271,12 +1360,6 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal - vkd3d_shader_instruction_make_nop(ins); - return; - } -- else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( -- &ins->declaration.dst.reg)) -- { -- vkd3d_shader_instruction_make_nop(ins); -- return; -- } - - if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins)) - return; -@@ -1433,11 +1516,10 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p - { - struct vkd3d_shader_instruction *ins; - const struct signature_element *e; -- unsigned int i, count, stride = 0; -+ unsigned int i, count = 2; - - for (i = 0; i < s->element_count; ++i) -- stride += !!s->elements[i].used_mask; -- count = 2 + 3 * stride; -+ count += !!s->elements[i].used_mask; - - if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) - return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1449,78 +1531,43 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p - ins = &normaliser->instructions.elements[dst]; - vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); - -- ins = &normaliser->instructions.elements[dst + 1 + 3 * stride]; -- vsir_instruction_init(ins, location, VKD3DSIH_RET); -- -- ins = &normaliser->instructions.elements[dst + 1]; -+ ++ins; - - for (i = 0; i < s->element_count; ++i) - { -- struct vkd3d_shader_instruction *ins_in, *ins_out, *ins_mov; -- struct vkd3d_shader_dst_param *param_in, *param_out; -- - e = &s->elements[i]; - if (!e->used_mask) - continue; - -- ins_in = ins; -- ins_out = &ins[stride]; -- ins_mov = &ins[2 * stride]; -- -- if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) -- { -- vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT_SIV); -- param_in = &ins_in->declaration.register_semantic.reg; -- ins_in->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); -- -- vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT_SIV); -- param_out = &ins_out->declaration.register_semantic.reg; -- ins_out->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); -- } -- else -- { -- vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT); -- param_in = &ins_in->declaration.dst; -- -- vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT); -- param_out = &ins_out->declaration.dst; -- } -- -- vsir_dst_param_init_io(param_in, VKD3DSPR_INPUT, e, 2); -- param_in->reg.idx[0].offset = input_control_point_count; -- param_in->reg.idx[1].offset = e->register_index; -- param_in->write_mask = e->mask; -- -- vsir_dst_param_init_io(param_out, VKD3DSPR_OUTPUT, e, 2); -- param_out->reg.idx[0].offset = input_control_point_count; -- param_out->reg.idx[1].offset = e->register_index; -- param_out->write_mask = e->mask; -- -- vsir_instruction_init(ins_mov, location, VKD3DSIH_MOV); -- ins_mov->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); -- ins_mov->dst_count = 1; -- ins_mov->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); -- ins_mov->src_count = 1; -+ vsir_instruction_init(ins, location, VKD3DSIH_MOV); -+ ins->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); -+ ins->dst_count = 1; -+ ins->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); -+ ins->src_count = 1; - -- if (!ins_mov->dst || ! ins_mov->src) -+ if (!ins->dst || ! ins->src) - { - WARN("Failed to allocate dst/src param.\n"); - return VKD3D_ERROR_OUT_OF_MEMORY; - } - -- vsir_dst_param_init_io(&ins_mov->dst[0], VKD3DSPR_OUTPUT, e, 2); -- ins_mov->dst[0].reg.idx[0].offset = 0; -- ins_mov->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; -- ins_mov->dst[0].reg.idx[1].offset = e->register_index; -+ vsir_dst_param_init_io(&ins->dst[0], VKD3DSPR_OUTPUT, e, 2); -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].reg.idx[0].offset = 0; -+ ins->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; -+ ins->dst[0].reg.idx[1].offset = e->register_index; - -- vsir_src_param_init_io(&ins_mov->src[0], VKD3DSPR_INPUT, e, 2); -- ins_mov->src[0].reg.idx[0].offset = 0; -- ins_mov->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; -- ins_mov->src[0].reg.idx[1].offset = e->register_index; -+ vsir_src_param_init_io(&ins->src[0], VKD3DSPR_INPUT, e, 2); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].reg.idx[0].offset = 0; -+ ins->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; -+ ins->src[0].reg.idx[1].offset = e->register_index; - - ++ins; - } - -+ vsir_instruction_init(ins, location, VKD3DSIH_RET); -+ - return VKD3D_OK; - } - -@@ -2042,12 +2089,11 @@ static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_sh - return id_idx; - } - --static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl, -+static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, - struct io_normaliser *normaliser) - { - unsigned int id_idx, reg_idx, write_mask, element_idx; - struct vkd3d_shader_register *reg = &dst_param->reg; -- struct vkd3d_shader_dst_param **dcl_params; - const struct shader_signature *signature; - const struct signature_element *e; - -@@ -2063,26 +2109,22 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - /* Convert patch constant outputs to the patch constant register type to avoid the need - * to convert compiler symbols when accessed as inputs in a later stage. */ - reg->type = VKD3DSPR_PATCHCONST; -- dcl_params = normaliser->pc_dcl_params; - } - else - { - signature = normaliser->output_signature; -- dcl_params = normaliser->output_dcl_params; - } - break; - - case VKD3DSPR_PATCHCONST: - reg_idx = reg->idx[reg->idx_count - 1].offset; - signature = normaliser->patch_constant_signature; -- dcl_params = normaliser->pc_dcl_params; - break; - - case VKD3DSPR_COLOROUT: - reg_idx = reg->idx[0].offset; - signature = normaliser->output_signature; - reg->type = VKD3DSPR_OUTPUT; -- dcl_params = normaliser->output_dcl_params; - break; - - case VKD3DSPR_INCONTROLPOINT: -@@ -2090,14 +2132,12 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - reg_idx = reg->idx[reg->idx_count - 1].offset; - signature = normaliser->input_signature; - reg->type = VKD3DSPR_INPUT; -- dcl_params = normaliser->input_dcl_params; - break; - - case VKD3DSPR_ATTROUT: - reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset; - signature = normaliser->output_signature; - reg->type = VKD3DSPR_OUTPUT; -- dcl_params = normaliser->output_dcl_params; - break; - - case VKD3DSPR_RASTOUT: -@@ -2107,7 +2147,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; - signature = normaliser->output_signature; - reg->type = VKD3DSPR_OUTPUT; -- dcl_params = normaliser->output_dcl_params; - /* Fog and point size are scalar, but fxc/d3dcompiler emits a full - * write mask when writing to them. */ - if (reg->idx[0].offset > 0) -@@ -2123,54 +2162,15 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - vkd3d_unreachable(); - e = &signature->elements[element_idx]; - -- if (is_io_dcl) -- { -- /* Validated in the TPF reader. */ -- VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); -- -- if (dcl_params[element_idx]) -- { -- /* Merge split declarations into a single one. */ -- dcl_params[element_idx]->write_mask |= dst_param->write_mask; -- /* Turn this into a nop. */ -- return false; -- } -- else -- { -- dcl_params[element_idx] = dst_param; -- } -- } -- - if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) - { -- if (is_io_dcl) -- { -- /* Emit an array size for the control points for consistency with inputs. */ -- reg->idx[0].offset = normaliser->output_control_point_count; -- } -- else -- { -- /* The control point id param. */ -- VKD3D_ASSERT(reg->idx[0].rel_addr); -- } -+ /* The control point id param. */ -+ VKD3D_ASSERT(reg->idx[0].rel_addr); - id_idx = 1; - } - - if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) -- { -- if (is_io_dcl) -- { -- /* For control point I/O, idx 0 contains the control point count. -- * Ensure it is moved up to the next slot. */ -- reg->idx[id_idx].offset = reg->idx[0].offset; -- reg->idx[0].offset = e->register_count; -- ++id_idx; -- } -- else -- { -- id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); -- } -- } -+ id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); - - /* Replace the register index with the signature element index */ - reg->idx[id_idx].offset = element_idx; -@@ -2264,37 +2264,10 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par - static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins, - struct io_normaliser *normaliser) - { -- struct vkd3d_shader_register *reg; - unsigned int i; - - switch (ins->opcode) - { -- case VKD3DSIH_DCL_INPUT: -- if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) -- { -- reg = &ins->declaration.dst.reg; -- -- /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their -- * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ -- if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) -- vkd3d_shader_instruction_make_nop(ins); -- else if (reg->type == VKD3DSPR_INCONTROLPOINT) -- reg->type = VKD3DSPR_INPUT; -- } -- /* fall through */ -- case VKD3DSIH_DCL_INPUT_PS: -- case VKD3DSIH_DCL_OUTPUT: -- if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser)) -- vkd3d_shader_instruction_make_nop(ins); -- break; -- case VKD3DSIH_DCL_INPUT_SGV: -- case VKD3DSIH_DCL_INPUT_SIV: -- case VKD3DSIH_DCL_INPUT_PS_SGV: -- case VKD3DSIH_DCL_INPUT_PS_SIV: -- case VKD3DSIH_DCL_OUTPUT_SIV: -- if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser)) -- vkd3d_shader_instruction_make_nop(ins); -- break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -@@ -2307,7 +2280,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - if (vsir_instruction_is_dcl(ins)) - break; - for (i = 0; i < ins->dst_count; ++i) -- shader_dst_param_io_normalise(&ins->dst[i], false, normaliser); -+ shader_dst_param_io_normalise(&ins->dst[i], normaliser); - for (i = 0; i < ins->src_count; ++i) - shader_src_param_io_normalise(&ins->src[i], normaliser); - break; -@@ -9139,6 +9112,41 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); - vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); - -+ for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) -+ { -+ if (!bitmap_is_set(program->io_dcls, i)) -+ continue; -+ -+ switch (i) -+ { -+ /* Input registers */ -+ case VKD3DSPR_PRIMID: -+ case VKD3DSPR_FORKINSTID: -+ case VKD3DSPR_JOININSTID: -+ case VKD3DSPR_THREADID: -+ case VKD3DSPR_THREADGROUPID: -+ case VKD3DSPR_LOCALTHREADID: -+ case VKD3DSPR_LOCALTHREADINDEX: -+ case VKD3DSPR_COVERAGE: -+ case VKD3DSPR_TESSCOORD: -+ case VKD3DSPR_OUTPOINTID: -+ case VKD3DSPR_GSINSTID: -+ case VKD3DSPR_WAVELANECOUNT: -+ case VKD3DSPR_WAVELANEINDEX: -+ /* Output registers */ -+ case VKD3DSPR_DEPTHOUT: -+ case VKD3DSPR_SAMPLEMASK: -+ case VKD3DSPR_DEPTHOUTGE: -+ case VKD3DSPR_DEPTHOUTLE: -+ case VKD3DSPR_OUTSTENCILREF: -+ break; -+ -+ default: -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid input/output declaration %u.", i); -+ } -+ } -+ - if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) - goto fail; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 9a3c3ed885e..0406b8fbd51 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -398,6 +398,28 @@ static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instr - msl_dst_cleanup(&dst, &gen->string_buffers); - } - -+static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, uint32_t src_mask) -+{ -+ unsigned int component_count; -+ struct msl_src src[2]; -+ struct msl_dst dst; -+ uint32_t dst_mask; -+ -+ dst_mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src[0], gen, &ins->src[0], src_mask); -+ msl_src_init(&src[1], gen, &ins->src[1], src_mask); -+ -+ if ((component_count = vsir_write_mask_component_count(dst_mask)) > 1) -+ msl_print_assignment(gen, &dst, "float%u(dot(%s, %s))", -+ component_count, src[0].str->buffer, src[1].str->buffer); -+ else -+ msl_print_assignment(gen, &dst, "dot(%s, %s)", src[0].str->buffer, src[1].str->buffer); -+ -+ msl_src_cleanup(&src[1], &gen->string_buffers); -+ msl_src_cleanup(&src[0], &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ - static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) - { - struct msl_src src; -@@ -513,14 +535,26 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_AND: - msl_binop(gen, ins, "&"); - break; -- case VKD3DSIH_DCL_INPUT: -- case VKD3DSIH_DCL_OUTPUT: -- case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; - case VKD3DSIH_DIV: - msl_binop(gen, ins, "/"); - break; -+ case VKD3DSIH_DP2: -+ msl_dot(gen, ins, vkd3d_write_mask_from_component_count(2)); -+ break; -+ case VKD3DSIH_DP3: -+ msl_dot(gen, ins, vkd3d_write_mask_from_component_count(3)); -+ break; -+ case VKD3DSIH_DP4: -+ msl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); -+ break; -+ case VKD3DSIH_IEQ: -+ msl_relop(gen, ins, "=="); -+ break; -+ case VKD3DSIH_EXP: -+ msl_intrinsic(gen, ins, "exp2"); -+ break; - case VKD3DSIH_FRC: - msl_intrinsic(gen, ins, "fract"); - break; -@@ -533,6 +567,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_GEO: - msl_relop(gen, ins, ">="); - break; -+ case VKD3DSIH_LTO: -+ msl_relop(gen, ins, "<"); -+ break; - case VKD3DSIH_INE: - case VKD3DSIH_NEU: - msl_relop(gen, ins, "!="); -@@ -562,6 +599,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_ROUND_Z: - msl_intrinsic(gen, ins, "trunc"); - break; -+ case VKD3DSIH_SQRT: -+ msl_intrinsic(gen, ins, "sqrt"); -+ break; - default: - msl_unhandled(gen, ins); - break; -@@ -737,13 +777,6 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) - continue; - } - -- if (e->interpolation_mode != VKD3DSIM_NONE) -- { -- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -- "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -- continue; -- } -- - if(e->register_count > 1) - { - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -@@ -787,6 +820,18 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) - break; - } - -+ switch (e->interpolation_mode) -+ { -+ /* The default interpolation attribute. */ -+ case VKD3DSIM_LINEAR: -+ case VKD3DSIM_NONE: -+ break; -+ default: -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled interpolation mode %#x.", e->interpolation_mode); -+ break; -+ } -+ - vkd3d_string_buffer_printf(buffer, ";\n"); - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 649f92a57f3..0b14f50a312 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -4851,35 +4851,36 @@ static const struct vkd3d_spirv_builtin vkd3d_output_point_size_builtin = - static const struct - { - enum vkd3d_shader_register_type reg_type; -+ SpvStorageClass storage_class; - struct vkd3d_spirv_builtin builtin; - } - vkd3d_register_builtins[] = - { -- {VKD3DSPR_THREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, -- {VKD3DSPR_LOCALTHREADID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, -- {VKD3DSPR_LOCALTHREADINDEX, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, -- {VKD3DSPR_THREADGROUPID, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, -+ {VKD3DSPR_THREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInGlobalInvocationId}}, -+ {VKD3DSPR_LOCALTHREADID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInLocalInvocationId}}, -+ {VKD3DSPR_LOCALTHREADINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInLocalInvocationIndex}}, -+ {VKD3DSPR_THREADGROUPID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 3, SpvBuiltInWorkgroupId}}, - -- {VKD3DSPR_GSINSTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, -- {VKD3DSPR_OUTPOINTID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, -+ {VKD3DSPR_GSINSTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, -+ {VKD3DSPR_OUTPOINTID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInInvocationId}}, - -- {VKD3DSPR_PRIMID, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, -+ {VKD3DSPR_PRIMID, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_INT, 1, SpvBuiltInPrimitiveId}}, - -- {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, -+ {VKD3DSPR_TESSCOORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, - -- {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, -+ {VKD3DSPR_POINT_COORD, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, - -- {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, -- {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, -+ {VKD3DSPR_COVERAGE, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, -+ {VKD3DSPR_SAMPLEMASK, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - -- {VKD3DSPR_DEPTHOUT, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, -- {VKD3DSPR_DEPTHOUTGE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, -- {VKD3DSPR_DEPTHOUTLE, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, -+ {VKD3DSPR_DEPTHOUT, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, -+ {VKD3DSPR_DEPTHOUTGE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, -+ {VKD3DSPR_DEPTHOUTLE, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_FLOAT, 1, SpvBuiltInFragDepth}}, - -- {VKD3DSPR_OUTSTENCILREF, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, -+ {VKD3DSPR_OUTSTENCILREF, SpvStorageClassOutput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInFragStencilRefEXT}}, - -- {VKD3DSPR_WAVELANECOUNT, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, -- {VKD3DSPR_WAVELANEINDEX, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, -+ {VKD3DSPR_WAVELANECOUNT, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupSize}}, -+ {VKD3DSPR_WAVELANEINDEX, SpvStorageClassInput, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSubgroupLocalInvocationId}}, - }; - - static void spirv_compiler_emit_register_execution_mode(struct spirv_compiler *compiler, -@@ -4938,14 +4939,18 @@ static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_sysval( - } - - static const struct vkd3d_spirv_builtin *get_spirv_builtin_for_register( -- enum vkd3d_shader_register_type reg_type) -+ enum vkd3d_shader_register_type reg_type, SpvStorageClass *storage_class) - { - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(vkd3d_register_builtins); ++i) - { - if (vkd3d_register_builtins[i].reg_type == reg_type) -+ { -+ if (storage_class) -+ *storage_class = vkd3d_register_builtins[i].storage_class; - return &vkd3d_register_builtins[i].builtin; -+ } - } - - return NULL; -@@ -4958,7 +4963,7 @@ static const struct vkd3d_spirv_builtin *vkd3d_get_spirv_builtin(const struct sp - - if ((builtin = get_spirv_builtin_for_sysval(compiler, sysval))) - return builtin; -- if ((builtin = get_spirv_builtin_for_register(reg_type))) -+ if ((builtin = get_spirv_builtin_for_register(reg_type, NULL))) - return builtin; - - if ((sysval != VKD3D_SHADER_SV_NONE && sysval != VKD3D_SHADER_SV_TARGET) -@@ -5290,21 +5295,26 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - return input_id; - } - --static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, -+static void spirv_compiler_emit_io_register(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_shader_register *reg = &dst->reg; - const struct vkd3d_spirv_builtin *builtin; - struct vkd3d_symbol reg_symbol; -+ SpvStorageClass storage_class; -+ uint32_t write_mask, id; - struct rb_entry *entry; -- uint32_t write_mask; -- uint32_t input_id; - - VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); - VKD3D_ASSERT(reg->idx_count < 2); - -- if (!(builtin = get_spirv_builtin_for_register(reg->type))) -+ if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -+ { -+ builtin = &vkd3d_output_point_size_builtin; -+ storage_class = SpvStorageClassOutput; -+ } -+ else if (!(builtin = get_spirv_builtin_for_register(reg->type, &storage_class))) - { - FIXME("Unhandled register %#x.\n", reg->type); - return; -@@ -5315,14 +5325,15 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, - if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) - return; - -- input_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassInput, 0); -+ id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, 0); - - write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); -- vkd3d_symbol_set_register_info(®_symbol, input_id, -- SpvStorageClassInput, builtin->component_type, write_mask); -+ vkd3d_symbol_set_register_info(®_symbol, id, -+ storage_class, builtin->component_type, write_mask); - reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; - spirv_compiler_put_symbol(compiler, ®_symbol); -- spirv_compiler_emit_register_debug_name(builder, input_id, reg); -+ spirv_compiler_emit_register_execution_mode(compiler, reg->type); -+ spirv_compiler_emit_register_debug_name(builder, id, reg); - } - - static unsigned int get_shader_output_swizzle(const struct spirv_compiler *compiler, -@@ -5426,41 +5437,6 @@ static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler * - } - } - --static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, -- const struct vkd3d_shader_dst_param *dst) --{ -- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- const struct vkd3d_shader_register *reg = &dst->reg; -- const struct vkd3d_spirv_builtin *builtin; -- struct vkd3d_symbol reg_symbol; -- uint32_t write_mask; -- uint32_t output_id; -- -- VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); -- VKD3D_ASSERT(reg->idx_count < 2); -- -- if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) -- { -- builtin = &vkd3d_output_point_size_builtin; -- } -- else if (!(builtin = get_spirv_builtin_for_register(reg->type))) -- { -- FIXME("Unhandled register %#x.\n", reg->type); -- return; -- } -- -- output_id = spirv_compiler_emit_builtin_variable(compiler, builtin, SpvStorageClassOutput, 0); -- -- vkd3d_symbol_make_register(®_symbol, reg); -- write_mask = vkd3d_write_mask_from_component_count(builtin->component_count); -- vkd3d_symbol_set_register_info(®_symbol, output_id, -- SpvStorageClassOutput, builtin->component_type, write_mask); -- reg_symbol.info.reg.is_aggregate = builtin->spirv_array_size; -- spirv_compiler_put_symbol(compiler, ®_symbol); -- spirv_compiler_emit_register_execution_mode(compiler, reg->type); -- spirv_compiler_emit_register_debug_name(builder, output_id, reg); --} -- - static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, - const struct vkd3d_spirv_builtin *builtin, const unsigned int *array_sizes, unsigned int size_count) - { -@@ -5825,16 +5801,6 @@ static void spirv_compiler_emit_shader_epilogue_function(struct spirv_compiler * - compiler->epilogue_function_id = 0; - } - --static void spirv_compiler_emit_hull_shader_builtins(struct spirv_compiler *compiler) --{ -- struct vkd3d_shader_dst_param dst; -- -- memset(&dst, 0, sizeof(dst)); -- vsir_register_init(&dst.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); -- dst.write_mask = VKD3DSP_WRITEMASK_0; -- spirv_compiler_emit_input_register(compiler, &dst); --} -- - static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *compiler) - { - const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->xfb_info; -@@ -5847,7 +5813,6 @@ static void spirv_compiler_emit_initial_declarations(struct spirv_compiler *comp - break; - case VKD3D_SHADER_TYPE_HULL: - vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationControl); -- spirv_compiler_emit_hull_shader_builtins(compiler); - break; - case VKD3D_SHADER_TYPE_DOMAIN: - vkd3d_spirv_set_execution_model(builder, SpvExecutionModelTessellationEvaluation); -@@ -6667,27 +6632,6 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi - tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); - } - --static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; -- -- /* INPUT and PATCHCONST are handled in spirv_compiler_emit_io_declarations(). -- * OUTPOINTID is handled in spirv_compiler_emit_hull_shader_builtins(). */ -- if (dst->reg.type != VKD3DSPR_INPUT && dst->reg.type != VKD3DSPR_PATCHCONST -- && dst->reg.type != VKD3DSPR_OUTPOINTID) -- spirv_compiler_emit_input_register(compiler, dst); --} -- --static void spirv_compiler_emit_dcl_output(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) --{ -- const struct vkd3d_shader_dst_param *dst = &instruction->declaration.dst; -- -- if (dst->reg.type != VKD3DSPR_OUTPUT && dst->reg.type != VKD3DSPR_PATCHCONST) -- spirv_compiler_emit_output_register(compiler, dst); --} -- - static void spirv_compiler_emit_dcl_stream(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) - { -@@ -10113,13 +10057,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_TGSM_STRUCTURED: - spirv_compiler_emit_dcl_tgsm_structured(compiler, instruction); - break; -- case VKD3DSIH_DCL_INPUT_PS: -- case VKD3DSIH_DCL_INPUT: -- spirv_compiler_emit_dcl_input(compiler, instruction); -- break; -- case VKD3DSIH_DCL_OUTPUT: -- spirv_compiler_emit_dcl_output(compiler, instruction); -- break; - case VKD3DSIH_DCL_STREAM: - spirv_compiler_emit_dcl_stream(compiler, instruction); - break; -@@ -10457,11 +10394,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - break; - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: -- case VKD3DSIH_DCL_INPUT_SGV: -- case VKD3DSIH_DCL_INPUT_SIV: -- case VKD3DSIH_DCL_INPUT_PS_SGV: -- case VKD3DSIH_DCL_INPUT_PS_SIV: -- case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_DCL_RESOURCE_RAW: - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_UAV_RAW: -@@ -10482,6 +10414,8 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - - static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) - { -+ struct vkd3d_shader_dst_param dst; -+ - for (unsigned int i = 0; i < compiler->input_signature.element_count; ++i) - spirv_compiler_emit_input(compiler, VKD3DSPR_INPUT, i); - -@@ -10505,19 +10439,27 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) - - if (compiler->program->has_point_size) - { -- struct vkd3d_shader_dst_param dst; -- - vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); - dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -- spirv_compiler_emit_output_register(compiler, &dst); -+ spirv_compiler_emit_io_register(compiler, &dst); - } - - if (compiler->program->has_point_coord) - { -- struct vkd3d_shader_dst_param dst; -- - vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); -- spirv_compiler_emit_input_register(compiler, &dst); -+ spirv_compiler_emit_io_register(compiler, &dst); -+ } -+ -+ for (unsigned int i = 0; i < sizeof(compiler->program->io_dcls) * CHAR_BIT; ++i) -+ { -+ /* For hull shaders we internally generate references to OUTPOINTID, -+ * so that must always be enabled. */ -+ if (bitmap_is_set(compiler->program->io_dcls, i) -+ || (compiler->program->shader_version.type == VKD3D_SHADER_TYPE_HULL && i == VKD3DSPR_OUTPOINTID)) -+ { -+ vsir_dst_param_init(&dst, i, VKD3D_DATA_FLOAT, 0); -+ spirv_compiler_emit_io_register(compiler, &dst); -+ } - } - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 55b28cdd875..eb6d7f26a2c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -163,6 +163,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE = 5037, - VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING = 5038, - VKD3D_SHADER_ERROR_HLSL_MISPLACED_SAMPLER_STATE = 5039, -+ VKD3D_SHADER_ERROR_HLSL_AMBIGUOUS_CALL = 5040, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -1433,6 +1434,7 @@ struct vsir_program - enum vsir_control_flow_type cf_type; - enum vsir_normalisation_level normalisation_level; - enum vkd3d_tessellator_domain tess_domain; -+ uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; - - const char **block_names; - size_t block_name_count; --- -2.45.2 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-01117c716dea0e934ac594a7596d90ad948.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-01117c716dea0e934ac594a7596d90ad948.patch deleted file mode 100644 index 598a25d5..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-01117c716dea0e934ac594a7596d90ad948.patch +++ /dev/null @@ -1,302 +0,0 @@ -From 5e37d8d942ac5cc23c459cdd3fa86eec85a9216a Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 5 Dec 2024 09:55:52 +1100 -Subject: [PATCH] Updated vkd3d to 01117c716dea0e934ac594a7596d90ad94895d65. - ---- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 3 -- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 -- - libs/vkd3d/libs/vkd3d-shader/ir.c | 16 +++++- - libs/vkd3d/libs/vkd3d-shader/msl.c | 78 +++++++++++++++++++++++++++-- - 4 files changed, 90 insertions(+), 10 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index f0d24b835e5..e7518404aa0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -3203,13 +3203,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_LOG2] = "log2", - [HLSL_OP1_LOGIC_NOT] = "!", - [HLSL_OP1_NEG] = "-", -- [HLSL_OP1_NRM] = "nrm", - [HLSL_OP1_RCP] = "rcp", - [HLSL_OP1_REINTERPRET] = "reinterpret", - [HLSL_OP1_ROUND] = "round", - [HLSL_OP1_RSQ] = "rsq", - [HLSL_OP1_SAT] = "sat", -- [HLSL_OP1_SIGN] = "sign", - [HLSL_OP1_SIN] = "sin", - [HLSL_OP1_SIN_REDUCED] = "sin_reduced", - [HLSL_OP1_SQRT] = "sqrt", -@@ -3219,7 +3217,6 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP2_BIT_AND] = "&", - [HLSL_OP2_BIT_OR] = "|", - [HLSL_OP2_BIT_XOR] = "^", -- [HLSL_OP2_CRS] = "crs", - [HLSL_OP2_DIV] = "/", - [HLSL_OP2_DOT] = "dot", - [HLSL_OP2_EQUAL] = "==", -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index addc98d5a43..b899c16357c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -714,13 +714,11 @@ enum hlsl_ir_expr_op - HLSL_OP1_LOG2, - HLSL_OP1_LOGIC_NOT, - HLSL_OP1_NEG, -- HLSL_OP1_NRM, - HLSL_OP1_RCP, - HLSL_OP1_REINTERPRET, - HLSL_OP1_ROUND, - HLSL_OP1_RSQ, - HLSL_OP1_SAT, -- HLSL_OP1_SIGN, - HLSL_OP1_SIN, - HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ - HLSL_OP1_SQRT, -@@ -730,7 +728,6 @@ enum hlsl_ir_expr_op - HLSL_OP2_BIT_AND, - HLSL_OP2_BIT_OR, - HLSL_OP2_BIT_XOR, -- HLSL_OP2_CRS, - HLSL_OP2_DIV, - HLSL_OP2_DOT, - HLSL_OP2_EQUAL, -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 64c9585af52..fbc3ac0f49d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -7195,6 +7195,7 @@ static void vsir_validate_register_without_indices(struct validation_context *ct - static void vsir_validate_io_register(struct validation_context *ctx, - const struct vkd3d_shader_register *reg) - { -+ unsigned int control_point_count = 0, control_point_index; - const struct shader_signature *signature; - bool has_control_point = false; - -@@ -7209,6 +7210,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, - case VKD3D_SHADER_TYPE_HULL: - case VKD3D_SHADER_TYPE_DOMAIN: - has_control_point = true; -+ control_point_count = ctx->program->input_control_point_count; - break; - - default: -@@ -7225,6 +7227,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, - { - signature = &ctx->program->output_signature; - has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; -+ control_point_count = ctx->program->output_control_point_count; - } - else - { -@@ -7241,11 +7244,13 @@ static void vsir_validate_io_register(struct validation_context *ctx, - case VKD3DSPR_INCONTROLPOINT: - signature = &ctx->program->input_signature; - has_control_point = true; -+ control_point_count = ctx->program->input_control_point_count; - break; - - case VKD3DSPR_OUTCONTROLPOINT: - signature = &ctx->program->output_signature; - has_control_point = true; -+ control_point_count = ctx->program->output_control_point_count; - break; - - case VKD3DSPR_PATCHCONST: -@@ -7262,6 +7267,8 @@ static void vsir_validate_io_register(struct validation_context *ctx, - * allowed to have a relative address. */ - unsigned int expected_idx_count = 1 + !!has_control_point; - -+ control_point_index = 0; -+ - if (reg->idx_count != expected_idx_count) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -@@ -7280,7 +7287,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, - /* If the signature element is not an array, indices are - * [signature] or [control point, signature]. If the signature - * element is an array, indices are [array, signature] or -- * [control point, array, signature]. In any case `signature' is -+ * [array, control point, signature]. In any case `signature' is - * not allowed to have a relative address, while the others are. - */ - if (reg->idx_count < 1) -@@ -7314,6 +7321,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, - is_array = true; - - expected_idx_count = 1 + !!has_control_point + !!is_array; -+ control_point_index = !!is_array; - - if (reg->idx_count != expected_idx_count) - { -@@ -7323,6 +7331,12 @@ static void vsir_validate_io_register(struct validation_context *ctx, - return; - } - } -+ -+ if (has_control_point && !reg->idx[control_point_index].rel_addr -+ && reg->idx[control_point_index].offset >= control_point_count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Control point index %u exceeds the control point count %u in a register of type %#x.", -+ reg->idx[control_point_index].offset, control_point_count, reg->type); - } - - static void vsir_validate_temp_register(struct validation_context *ctx, -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 0406b8fbd51..f1ca581f1d2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -422,16 +422,25 @@ static void msl_dot(struct msl_generator *gen, const struct vkd3d_shader_instruc - - static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) - { -+ struct vkd3d_string_buffer *args; - struct msl_src src; - struct msl_dst dst; -+ unsigned int i; - uint32_t mask; - - mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -- msl_src_init(&src, gen, &ins->src[0], mask); -+ args = vkd3d_string_buffer_get(&gen->string_buffers); - -- msl_print_assignment(gen, &dst, "%s(%s)", op, src.str->buffer); -+ for (i = 0; i < ins->src_count; ++i) -+ { -+ msl_src_init(&src, gen, &ins->src[i], mask); -+ vkd3d_string_buffer_printf(args, "%s%s", i ? ", " : "", src.str->buffer); -+ msl_src_cleanup(&src, &gen->string_buffers); -+ } - -- msl_src_cleanup(&src, &gen->string_buffers); -+ msl_print_assignment(gen, &dst, "%s(%s)", op, args->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, args); - msl_dst_cleanup(&dst, &gen->string_buffers); - } - -@@ -477,6 +486,31 @@ static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instru - msl_dst_cleanup(&dst, &gen->string_buffers); - } - -+static void msl_if(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ const char *condition; -+ struct msl_src src; -+ -+ msl_src_init(&src, gen, &ins->src[0], VKD3DSP_WRITEMASK_0); -+ -+ msl_print_indent(gen->buffer, gen->indent); -+ condition = ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ ? "bool" : "!bool"; -+ vkd3d_string_buffer_printf(gen->buffer, "if (%s(%s))\n", condition, src.str->buffer); -+ -+ msl_src_cleanup(&src, &gen->string_buffers); -+ -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "{\n"); -+ ++gen->indent; -+} -+ -+static void msl_endif(struct msl_generator *gen) -+{ -+ --gen->indent; -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "}\n"); -+} -+ - static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - struct msl_src src; -@@ -549,6 +583,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_DP4: - msl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); - break; -+ case VKD3DSIH_ENDIF: -+ msl_endif(gen); -+ break; - case VKD3DSIH_IEQ: - msl_relop(gen, ins, "=="); - break; -@@ -567,9 +604,18 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_GEO: - msl_relop(gen, ins, ">="); - break; -+ case VKD3DSIH_IF: -+ msl_if(gen, ins); -+ break; - case VKD3DSIH_LTO: - msl_relop(gen, ins, "<"); - break; -+ case VKD3DSIH_MAX: -+ msl_intrinsic(gen, ins, "max"); -+ break; -+ case VKD3DSIH_MIN: -+ msl_intrinsic(gen, ins, "min"); -+ break; - case VKD3DSIH_INE: - case VKD3DSIH_NEU: - msl_relop(gen, ins, "!="); -@@ -578,6 +624,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_UTOF: - msl_cast(gen, ins, "float"); - break; -+ case VKD3DSIH_LOG: -+ msl_intrinsic(gen, ins, "log2"); -+ break; - case VKD3DSIH_MOV: - msl_mov(gen, ins); - break; -@@ -593,12 +642,21 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_RET: - msl_ret(gen, ins); - break; -+ case VKD3DSIH_ROUND_NE: -+ msl_intrinsic(gen, ins, "rint"); -+ break; -+ case VKD3DSIH_ROUND_NI: -+ msl_intrinsic(gen, ins, "floor"); -+ break; - case VKD3DSIH_ROUND_PI: - msl_intrinsic(gen, ins, "ceil"); - break; - case VKD3DSIH_ROUND_Z: - msl_intrinsic(gen, ins, "trunc"); - break; -+ case VKD3DSIH_RSQ: -+ msl_intrinsic(gen, ins, "rsqrt"); -+ break; - case VKD3DSIH_SQRT: - msl_intrinsic(gen, ins, "sqrt"); - break; -@@ -765,6 +823,16 @@ static void msl_generate_input_struct_declarations(struct msl_generator *gen) - - if (e->sysval_semantic) - { -+ if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) -+ { -+ if (type != VKD3D_SHADER_TYPE_PIXEL) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", type); -+ -+ msl_print_indent(gen->buffer, 1); -+ vkd3d_string_buffer_printf(buffer, "bool is_front_face [[front_facing]];\n"); -+ continue; -+ } - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled system value %#x.", e->sysval_semantic); - continue; -@@ -979,6 +1047,10 @@ static void msl_generate_entrypoint_prologue(struct msl_generator *gen) - vkd3d_string_buffer_printf(buffer, " = input.shader_in_%u", i); - msl_print_write_mask(buffer, e->mask); - } -+ else if (e->sysval_semantic == VKD3D_SHADER_SV_IS_FRONT_FACE) -+ { -+ vkd3d_string_buffer_printf(buffer, ".u = uint4(input.is_front_face ? 0xffffffffu : 0u, 0, 0, 0)"); -+ } - else - { - vkd3d_string_buffer_printf(buffer, " = ", e->sysval_semantic); --- -2.45.2 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-65b67e84a8ec23d4532166cebed86095414.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-65b67e84a8ec23d4532166cebed86095414.patch deleted file mode 100644 index f81ac489..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-65b67e84a8ec23d4532166cebed86095414.patch +++ /dev/null @@ -1,4053 +0,0 @@ -From 6a620cd5b95e2ee718b9b388204a1bf4d641dfc2 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 11 Dec 2024 08:32:12 +1100 -Subject: [PATCH] Updated vkd3d to 65b67e84a8ec23d4532166cebed86095414e1536. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 38 +- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 35 ++ - libs/vkd3d/libs/vkd3d-shader/dxil.c | 5 + - libs/vkd3d/libs/vkd3d-shader/fx.c | 12 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 39 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 503 +++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 334 ++++++++++-- - libs/vkd3d/libs/vkd3d-shader/msl.c | 69 ++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 362 ++++++++++++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 479 +++++++---------- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 8 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 36 ++ - libs/vkd3d/libs/vkd3d/command.c | 273 +++++++--- - libs/vkd3d/libs/vkd3d/device.c | 58 +- - libs/vkd3d/libs/vkd3d/state.c | 237 +++++---- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 63 ++- - 16 files changed, 1903 insertions(+), 648 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 7ffd060d833..e7dd65d1fef 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1570,9 +1570,14 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - vkd3d_unreachable(); - } - --D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) -+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler) - { -- switch (type->class) -+ enum hlsl_type_class class = type->class; -+ -+ if (is_combined_sampler) -+ class = HLSL_CLASS_TEXTURE; -+ -+ switch (class) - { - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -@@ -1639,7 +1644,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - break; - - case HLSL_CLASS_ARRAY: -- return hlsl_sm1_base_type(type->e.array.type); -+ return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler); - - case HLSL_CLASS_STRUCT: - return D3DXPT_VOID; -@@ -1677,7 +1682,8 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - vkd3d_unreachable(); - } - --static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start) -+static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, -+ struct hlsl_type *type, bool is_combined_sampler, unsigned int ctab_start) - { - const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type); - unsigned int array_size = hlsl_get_multiarray_size(type); -@@ -1697,7 +1703,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ - struct hlsl_struct_field *field = &array_type->e.record.fields[i]; - - field->name_bytecode_offset = put_string(buffer, field->name); -- write_sm1_type(buffer, field->type, ctab_start); -+ write_sm1_type(buffer, field->type, false, ctab_start); - } - - fields_offset = bytecode_align(buffer) - ctab_start; -@@ -1711,7 +1717,8 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ - } - } - -- type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); -+ type->bytecode_offset = put_u32(buffer, -+ vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler))); - put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, field_count)); - put_u32(buffer, fields_offset); -@@ -1748,7 +1755,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) - list_move_tail(&ctx->extern_vars, &sorted); - } - --void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) - { - size_t ctab_offset, ctab_start, ctab_end, vars_offset, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; -@@ -1836,7 +1843,7 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff - name_offset = put_string(buffer, var->name); - set_u32(buffer, var_offset, name_offset - ctab_start); - -- write_sm1_type(buffer, var->data_type, ctab_start); -+ write_sm1_type(buffer, var->data_type, var->is_combined_sampler, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); - - if (var->default_values) -@@ -1907,6 +1914,21 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff - set_u32(buffer, size_offset, vkd3d_make_u32(VKD3D_SM1_OP_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); - } - -+void sm1_generate_ctab(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ctab) -+{ -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ -+ write_sm1_uniforms(ctx, &buffer); -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ ctab->code = buffer.data; -+ ctab->size = buffer.size; -+} -+ - static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) - { - return ((type << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK) -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index f6ac8e0829e..81af62f7810 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -115,6 +115,14 @@ static uint32_t read_u32(const char **ptr) - return ret; - } - -+static uint64_t read_u64(const char **ptr) -+{ -+ uint64_t ret; -+ memcpy(&ret, *ptr, sizeof(ret)); -+ *ptr += sizeof(ret); -+ return ret; -+} -+ - static float read_float(const char **ptr) - { - union -@@ -502,6 +510,28 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - return ret; - } - -+static int shdr_parse_features(const struct vkd3d_shader_dxbc_section_desc *section, -+ struct vkd3d_shader_message_context *message_context, struct vsir_features *f) -+{ -+ const char *data = section->data.code; -+ const char *ptr = data; -+ uint64_t flags; -+ -+ if (!require_space(0, 1, sizeof(uint64_t), section->data.size)) -+ { -+ WARN("Invalid data size %#zx.\n", section->data.size); -+ vkd3d_shader_error(message_context, NULL, VKD3D_SHADER_ERROR_DXBC_INVALID_CHUNK_SIZE, -+ "SFI0 section size %zu is too small to contain flags.\n", section->data.size); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ flags = read_u64(&ptr); -+ -+ if (flags & DXBC_SFI0_REQUIRES_ROVS) -+ f->rovs = true; -+ -+ return VKD3D_OK; -+} -+ - static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - struct vkd3d_shader_message_context *message_context, void *context) - { -@@ -558,6 +588,11 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, - desc->byte_code_size = section->data.size; - break; - -+ case TAG_SFI0: -+ if ((ret = shdr_parse_features(section, message_context, &desc->features)) < 0) -+ return ret; -+ break; -+ - case TAG_AON9: - TRACE("Skipping AON9 shader code chunk.\n"); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 71f3c7f17b0..d76f9bcc772 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -9745,6 +9745,8 @@ static void sm6_parser_emit_dcl_tessellator_partitioning(struct sm6_parser *sm6, - - ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_PARTITIONING); - ins->declaration.tessellator_partitioning = tessellator_partitioning; -+ -+ sm6->p.program->tess_partitioning = tessellator_partitioning; - } - - static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser *sm6, -@@ -9761,6 +9763,8 @@ static void sm6_parser_emit_dcl_tessellator_output_primitive(struct sm6_parser * - - ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE); - ins->declaration.tessellator_output_primitive = primitive; -+ -+ sm6->p.program->tess_output_primitive = primitive; - } - - static void sm6_parser_emit_dcl_max_tessellation_factor(struct sm6_parser *sm6, struct sm6_metadata_value *m) -@@ -10379,6 +10383,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - *input_signature = dxbc_desc->input_signature; - *output_signature = dxbc_desc->output_signature; - *patch_constant_signature = dxbc_desc->patch_constant_signature; -+ program->features = dxbc_desc->features; - memset(dxbc_desc, 0, sizeof(*dxbc_desc)); - - block = &sm6->root_block; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 064e15c4b60..3795add87c7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -1021,8 +1021,8 @@ static uint32_t get_fx_2_type_class(const struct hlsl_type *type) - return hlsl_sm1_class(type); - } - --static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, -- struct fx_write_context *fx) -+static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, -+ const struct hlsl_semantic *semantic, bool is_combined_sampler, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - uint32_t semantic_offset, offset, elements_count = 0, name_offset; -@@ -1038,7 +1038,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - name_offset = write_string(name, fx); - semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; - -- offset = put_u32(buffer, hlsl_sm1_base_type(type)); -+ offset = put_u32(buffer, hlsl_sm1_base_type(type, is_combined_sampler)); - put_u32(buffer, get_fx_2_type_class(type)); - put_u32(buffer, name_offset); - put_u32(buffer, semantic_offset); -@@ -1074,7 +1074,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - - /* Validated in check_invalid_object_fields(). */ - VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); -- write_fx_2_parameter(field->type, field->name, &field->semantic, fx); -+ write_fx_2_parameter(field->type, field->name, &field->semantic, false, fx); - } - } - -@@ -1335,7 +1335,7 @@ static void write_fx_2_parameters(struct fx_write_context *fx) - if (!is_type_supported_fx_2(ctx, var->data_type, &var->loc)) - continue; - -- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); -+ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); - value_offset = write_fx_2_initial_value(var, fx); - - flags = 0; -@@ -1358,7 +1358,7 @@ static void write_fx_2_annotation(struct hlsl_ir_var *var, struct fx_write_conte - struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t desc_offset, value_offset; - -- desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); -+ desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, var->is_combined_sampler, fx); - value_offset = write_fx_2_initial_value(var, fx); - - put_u32(buffer, desc_offset); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index b899c16357c..5f05ceda004 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -535,6 +535,10 @@ struct hlsl_ir_var - * element of a struct, and thus needs to be aligned when packed in the signature. */ - bool force_align; - -+ /* Whether this is a sampler that was created from the combination of a -+ * sampler and a texture for SM<4 backwards compatibility. */ -+ bool is_combined_sampler; -+ - uint32_t is_input_semantic : 1; - uint32_t is_output_semantic : 1; - uint32_t is_uniform : 1; -@@ -1643,16 +1647,35 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - struct hlsl_block *block, void *context); - - D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); --D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); -+D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler); -+ -+void sm1_generate_ctab(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ctab); -+ -+struct extern_resource -+{ -+ /* "var" is only not NULL if this resource is a whole variable, so it may -+ * be responsible for more than one component. */ -+ const struct hlsl_ir_var *var; -+ const struct hlsl_buffer *buffer; -+ -+ char *name; -+ bool is_user_packed; -+ -+ /* The data type of a single component of the resource. This might be -+ * different from the data type of the resource itself in 4.0 profiles, -+ * where an array (or multi-dimensional array) is handled as a single -+ * resource, unlike in 5.0. */ -+ struct hlsl_type *component_type; -+ -+ enum hlsl_regset regset; -+ unsigned int id, space, index, bind_count; - --void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); --int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -- const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -+ struct vkd3d_shader_location loc; -+}; - --int tpf_compile(struct vsir_program *program, uint64_t config_flags, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -+struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count); -+void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count); -+void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef); - - enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, - unsigned int storage_modifiers); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 8f45628dbee..e6924aa70ef 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -2790,6 +2790,108 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n - - return true; - } -+ -+static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type) -+{ -+ struct hlsl_type *sampler_type; -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ { -+ if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type))) -+ return NULL; -+ -+ return hlsl_new_array_type(ctx, sampler_type, type->e.array.elements_count); -+ } -+ -+ return ctx->builtin_types.sampler[type->sampler_dim]; -+} -+ -+static bool deref_offset_is_zero(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) -+{ -+ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); -+ unsigned int index; -+ -+ if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index)) -+ return false; -+ return index == 0; -+} -+ -+/* Lower samples from separate texture and sampler variables to samples from -+ * synthetized combined samplers. That is, translate SM4-style samples in the -+ * source to SM1-style samples in the bytecode. */ -+static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_var *var, *resource, *sampler; -+ struct hlsl_ir_resource_load *load; -+ struct vkd3d_string_buffer *name; -+ struct hlsl_type *sampler_type; -+ -+ if (instr->type != HLSL_IR_RESOURCE_LOAD) -+ return false; -+ load = hlsl_ir_resource_load(instr); -+ -+ if (load->load_type != HLSL_RESOURCE_SAMPLE -+ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD -+ && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS) -+ return false; -+ -+ if (!load->sampler.var) -+ return false; -+ resource = load->resource.var; -+ sampler = load->sampler.var; -+ -+ VKD3D_ASSERT(hlsl_type_is_resource(resource->data_type)); -+ VKD3D_ASSERT(hlsl_type_is_resource(sampler->data_type)); -+ if (sampler->data_type->class == HLSL_CLASS_ARRAY && !deref_offset_is_zero(ctx, &load->sampler)) -+ { -+ /* Not supported by d3dcompiler. */ -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, -+ "Lower separated samples with sampler arrays."); -+ return false; -+ } -+ if (!resource->is_uniform) -+ return false; -+ if(!sampler->is_uniform) -+ return false; -+ -+ if (!(name = hlsl_get_string_buffer(ctx))) -+ return false; -+ vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name); -+ -+ TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer)); -+ -+ if (!(var = hlsl_get_var(ctx->globals, name->buffer))) -+ { -+ if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type))) -+ { -+ hlsl_release_string_buffer(ctx, name); -+ return false; -+ } -+ -+ if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, sampler_type, &instr->loc, false))) -+ { -+ hlsl_release_string_buffer(ctx, name); -+ return false; -+ } -+ var->storage_modifiers |= HLSL_STORAGE_UNIFORM; -+ var->is_combined_sampler = true; -+ var->is_uniform = 1; -+ -+ list_remove(&var->scope_entry); -+ list_add_after(&sampler->scope_entry, &var->scope_entry); -+ -+ list_add_after(&sampler->extern_entry, &var->extern_entry); -+ } -+ hlsl_release_string_buffer(ctx, name); -+ -+ /* Only change the deref's var, keep the path. */ -+ load->resource.var = var; -+ hlsl_cleanup_deref(&load->sampler); -+ load->sampler.var = NULL; -+ -+ return true; -+} -+ - /* Lower combined samples and sampler variables to synthesized separated textures and samplers. - * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ - static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2901,6 +3003,27 @@ static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl - list_add_tail(list, &to_add->extern_entry); - } - -+static bool sort_synthetic_combined_samplers_first(struct hlsl_ctx *ctx) -+{ -+ struct list separated_resources; -+ struct hlsl_ir_var *var, *next; -+ -+ list_init(&separated_resources); -+ -+ LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_combined_sampler) -+ { -+ list_remove(&var->extern_entry); -+ insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_SAMPLERS); -+ } -+ } -+ -+ list_move_head(&ctx->extern_vars, &separated_resources); -+ -+ return false; -+} -+ - static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) - { - struct list separated_resources; -@@ -5096,7 +5219,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - } - } - --static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort) -+static void sort_uniform_by_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort, enum hlsl_regset regset) - { - struct hlsl_ir_var *var; - -@@ -5104,8 +5227,8 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ - - LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry) - { -- uint32_t to_sort_size = to_sort->bind_count[HLSL_REGSET_NUMERIC]; -- uint32_t var_size = var->bind_count[HLSL_REGSET_NUMERIC]; -+ uint32_t to_sort_size = to_sort->bind_count[regset]; -+ uint32_t var_size = var->bind_count[regset]; - - if (to_sort_size > var_size) - { -@@ -5117,7 +5240,7 @@ static void sort_uniform_by_numeric_bind_count(struct list *sorted, struct hlsl_ - list_add_tail(sorted, &to_sort->extern_entry); - } - --static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) -+static void sort_uniforms_by_bind_count(struct hlsl_ctx *ctx, enum hlsl_regset regset) - { - struct list sorted = LIST_INIT(sorted); - struct hlsl_ir_var *var, *next; -@@ -5125,7 +5248,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) - LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_uniform) -- sort_uniform_by_numeric_bind_count(&sorted, var); -+ sort_uniform_by_bind_count(&sorted, var, regset); - } - list_move_tail(&ctx->extern_vars, &sorted); - } -@@ -5173,7 +5296,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -- sort_uniforms_by_numeric_bind_count(ctx); -+ sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC); - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -7884,10 +8007,9 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo - } - - static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -- uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+ uint64_t config_flags, struct vsir_program *program) - { - struct vkd3d_shader_version version = {0}; -- struct vkd3d_bytecode_buffer buffer = {0}; - struct hlsl_block block; - - version.major = ctx->profile->major_version; -@@ -7899,16 +8021,6 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - return; - } - -- write_sm1_uniforms(ctx, &buffer); -- if (buffer.status) -- { -- vkd3d_free(buffer.data); -- ctx->result = buffer.status; -- return; -- } -- ctab->code = buffer.data; -- ctab->size = buffer.size; -- - generate_vsir_signature(ctx, program, entry_func); - - hlsl_block_init(&block); -@@ -9473,6 +9585,292 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); - } - -+static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program) -+{ -+ struct extern_resource *extern_resources; -+ unsigned int extern_resources_count; -+ -+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -+ for (unsigned int i = 0; i < extern_resources_count; ++i) -+ { -+ if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) -+ program->features.rovs = true; -+ } -+ sm4_free_extern_resources(extern_resources, extern_resources_count); -+ -+ /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, -+ * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ -+} -+ -+static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_function_decl *entry_func) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ struct extern_resource *extern_resources; -+ unsigned int extern_resources_count, i; -+ -+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -+ -+ if (version->major == 4) -+ { -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ const struct hlsl_type *type = resource->component_type; -+ -+ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ program->global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; -+ break; -+ } -+ } -+ } -+ -+ sm4_free_extern_resources(extern_resources, extern_resources_count); -+ -+ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) -+ program->global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; -+} -+ -+static void sm4_generate_vsir_add_dcl_constant_buffer(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_buffer *cbuffer) -+{ -+ unsigned int array_first = cbuffer->reg.index; -+ unsigned int array_last = cbuffer->reg.index; /* FIXME: array end. */ -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &cbuffer->loc, VKD3DSIH_DCL_CONSTANT_BUFFER, 0, 0))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ ins->declaration.cb.size = cbuffer->size; -+ -+ src_param = &ins->declaration.cb.src; -+ vsir_src_param_init(src_param, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ ins->declaration.cb.range.space = cbuffer->reg.space; -+ ins->declaration.cb.range.first = array_first; -+ ins->declaration.cb.range.last = array_last; -+ -+ src_param->reg.idx[0].offset = cbuffer->reg.id; -+ src_param->reg.idx[1].offset = array_first; -+ src_param->reg.idx[2].offset = array_last; -+ src_param->reg.idx_count = 3; -+} -+ -+static void sm4_generate_vsir_add_dcl_sampler(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct extern_resource *resource) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i; -+ -+ VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); -+ VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); -+ -+ for (i = 0; i < resource->bind_count; ++i) -+ { -+ unsigned int array_first = resource->index + i; -+ unsigned int array_last = resource->index + i; /* FIXME: array end. */ -+ -+ if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -+ continue; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, VKD3DSIH_DCL_SAMPLER, 0, 0))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) -+ ins->flags |= VKD3DSI_SAMPLER_COMPARISON_MODE; -+ -+ src_param = &ins->declaration.sampler.src; -+ vsir_src_param_init(src_param, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 0); -+ -+ ins->declaration.sampler.range.first = array_first; -+ ins->declaration.sampler.range.last = array_last; -+ ins->declaration.sampler.range.space = resource->space; -+ -+ src_param->reg.idx[0].offset = resource->id; -+ src_param->reg.idx[1].offset = array_first; -+ src_param->reg.idx[2].offset = array_last; -+ src_param->reg.idx_count = 3; -+ } -+} -+ -+static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const struct hlsl_type *type) -+{ -+ switch (type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_1D: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_1D; -+ case HLSL_SAMPLER_DIM_2D: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ case HLSL_SAMPLER_DIM_3D: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_3D; -+ case HLSL_SAMPLER_DIM_CUBE: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_CUBE; -+ case HLSL_SAMPLER_DIM_1DARRAY: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY; -+ case HLSL_SAMPLER_DIM_2DARRAY: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; -+ case HLSL_SAMPLER_DIM_2DMS: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_2DMS; -+ case HLSL_SAMPLER_DIM_2DMSARRAY: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY; -+ case HLSL_SAMPLER_DIM_CUBEARRAY: -+ return VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY; -+ case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ return VKD3D_SHADER_RESOURCE_BUFFER; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_type *type) -+{ -+ const struct hlsl_type *format = type->e.resource.format; -+ -+ switch (format->e.numeric.type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ return VKD3D_DATA_DOUBLE; -+ -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_HALF: -+ if (format->modifiers & HLSL_MODIFIER_UNORM) -+ return VKD3D_DATA_UNORM; -+ if (format->modifiers & HLSL_MODIFIER_SNORM) -+ return VKD3D_DATA_SNORM; -+ return VKD3D_DATA_FLOAT; -+ -+ case HLSL_TYPE_INT: -+ return VKD3D_DATA_INT; -+ break; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ return VKD3D_DATA_UINT; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct extern_resource *resource, -+ bool uav) -+{ -+ enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; -+ struct vkd3d_shader_structured_resource *structured_resource; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_semantic *semantic; -+ struct vkd3d_shader_instruction *ins; -+ struct hlsl_type *component_type; -+ enum vkd3d_shader_opcode opcode; -+ bool multisampled; -+ unsigned int i, j; -+ -+ VKD3D_ASSERT(resource->regset == regset); -+ VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1); -+ -+ component_type = resource->component_type; -+ -+ for (i = 0; i < resource->bind_count; ++i) -+ { -+ unsigned int array_first = resource->index + i; -+ unsigned int array_last = resource->index + i; /* FIXME: array end. */ -+ -+ if (resource->var && !resource->var->objects_usage[regset][i].used) -+ continue; -+ -+ if (uav) -+ { -+ switch (component_type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ opcode = VKD3DSIH_DCL_UAV_STRUCTURED; -+ break; -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ opcode = VKD3DSIH_DCL_UAV_RAW; -+ break; -+ default: -+ opcode = VKD3DSIH_DCL_UAV_TYPED; -+ break; -+ } -+ } -+ else -+ { -+ switch (component_type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ opcode = VKD3DSIH_DCL_RESOURCE_RAW; -+ break; -+ default: -+ opcode = VKD3DSIH_DCL; -+ break; -+ } -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, opcode, 0, 0))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ semantic = &ins->declaration.semantic; -+ structured_resource = &ins->declaration.structured_resource; -+ dst_param = &semantic->resource.reg; -+ vsir_dst_param_init(dst_param, uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 0); -+ -+ if (uav && component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -+ structured_resource->byte_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; -+ if (uav && component_type->e.resource.rasteriser_ordered) -+ ins->flags = VKD3DSUF_RASTERISER_ORDERED_VIEW; -+ -+ multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; -+ -+ if (!hlsl_version_ge(ctx, 4, 1) && multisampled && !component_type->sample_count) -+ { -+ hlsl_error(ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Multisampled texture object declaration needs sample count for profile %u.%u.", -+ ctx->profile->major_version, ctx->profile->minor_version); -+ } -+ -+ for (j = 0; j < 4; ++j) -+ semantic->resource_data_type[j] = sm4_generate_vsir_get_format_type(component_type); -+ -+ semantic->resource.range.first = array_first; -+ semantic->resource.range.last = array_last; -+ semantic->resource.range.space = resource->space; -+ -+ dst_param->reg.idx[0].offset = resource->id; -+ dst_param->reg.idx[1].offset = array_first; -+ dst_param->reg.idx[2].offset = array_last; -+ dst_param->reg.idx_count = 3; -+ -+ ins->resource_type = sm4_generate_vsir_get_resource_type(resource->component_type); -+ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ ins->raw = true; -+ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -+ { -+ ins->structured = true; -+ ins->resource_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC]; -+ } -+ -+ if (multisampled) -+ semantic->sample_count = component_type->sample_count; -+ } -+} -+ - /* OBJECTIVE: Translate all the information from ctx and entry_func to the - * vsir_program, so it can be used as input to tpf_compile() without relying - * on ctx and entry_func. */ -@@ -9480,6 +9878,9 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - uint64_t config_flags, struct vsir_program *program) - { - struct vkd3d_shader_version version = {0}; -+ struct extern_resource *extern_resources; -+ unsigned int extern_resources_count; -+ const struct hlsl_buffer *cbuffer; - - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; -@@ -9501,6 +9902,39 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - program->thread_group_size.y = ctx->thread_count[1]; - program->thread_group_size.z = ctx->thread_count[2]; - } -+ else if (version.type == VKD3D_SHADER_TYPE_HULL) -+ { -+ program->input_control_point_count = 1; /* TODO: Obtain from InputPatch */ -+ program->output_control_point_count = ctx->output_control_point_count; -+ program->tess_domain = ctx->domain; -+ program->tess_partitioning = ctx->partitioning; -+ program->tess_output_primitive = ctx->output_primitive; -+ } -+ else if (version.type == VKD3D_SHADER_TYPE_DOMAIN) -+ { -+ program->input_control_point_count = 0; /* TODO: Obtain from OutputPatch */ -+ program->tess_domain = ctx->domain; -+ } -+ -+ LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (cbuffer->reg.allocated) -+ sm4_generate_vsir_add_dcl_constant_buffer(ctx, program, cbuffer); -+ } -+ -+ extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -+ for (unsigned int i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ -+ if (resource->regset == HLSL_REGSET_SAMPLERS) -+ sm4_generate_vsir_add_dcl_sampler(ctx, program, resource); -+ else if (resource->regset == HLSL_REGSET_TEXTURES) -+ sm4_generate_vsir_add_dcl_texture(ctx, program, resource, false); -+ else if (resource->regset == HLSL_REGSET_UAVS) -+ sm4_generate_vsir_add_dcl_texture(ctx, program, resource, true); -+ } -+ sm4_free_extern_resources(extern_resources, extern_resources_count); - - if (version.type == VKD3D_SHADER_TYPE_HULL) - generate_vsir_add_program_instruction(ctx, program, -@@ -9512,6 +9946,9 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); - sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); - } -+ -+ generate_vsir_scan_required_features(ctx, program); -+ generate_vsir_scan_global_flags(ctx, program, func); - } - - static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -@@ -9986,9 +10423,13 @@ static void process_entry_function(struct hlsl_ctx *ctx, - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - -+ if (hlsl_version_lt(ctx, 4, 0)) -+ hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); -+ - hlsl_transform_ir(ctx, validate_dereferences, body, NULL); - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); -- if (profile->major_version >= 4) -+ -+ if (hlsl_version_ge(ctx, 4, 0)) - hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); - - do -@@ -9996,7 +10437,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, - while (hlsl_transform_ir(ctx, dce, body, NULL)); - - hlsl_transform_ir(ctx, track_components_usage, body, NULL); -- sort_synthetic_separated_samplers_first(ctx); -+ if (hlsl_version_lt(ctx, 4, 0)) -+ sort_synthetic_combined_samplers_first(ctx); -+ else -+ sort_synthetic_separated_samplers_first(ctx); - - if (profile->major_version < 4) - { -@@ -10101,14 +10545,16 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - mark_indexable_vars(ctx, entry_func); - allocate_temp_registers(ctx, entry_func); - allocate_const_registers(ctx, entry_func); -+ sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); - } - else - { - allocate_buffers(ctx); - allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES); - allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS); -+ allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); - } -- allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); - - if (TRACE_ON()) - rb_for_each_entry(&ctx->functions, dump_function, ctx); -@@ -10125,7 +10571,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - struct vsir_program program; - int result; - -- sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); -+ sm1_generate_ctab(ctx, &ctab); -+ if (ctx->result) -+ return ctx->result; -+ -+ sm1_generate_vsir(ctx, entry_func, config_flags, &program); - if (ctx->result) - { - vsir_program_cleanup(&program); -@@ -10142,18 +10592,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - case VKD3D_SHADER_TARGET_DXBC_TPF: - { - uint32_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vkd3d_shader_code rdef = {0}; - struct vsir_program program; - int result; - -+ sm4_generate_rdef(ctx, &rdef); -+ if (ctx->result) -+ return ctx->result; -+ - sm4_generate_vsir(ctx, entry_func, config_flags, &program); - if (ctx->result) - { - vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&rdef); - return ctx->result; - } - -- result = tpf_compile(&program, config_flags, out, ctx->message_context, ctx, entry_func); -+ result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context); - vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&rdef); - return result; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index fbc3ac0f49d..b3442ec92ae 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -7150,6 +7150,20 @@ struct validation_context - - unsigned int outer_tess_idxs[4]; - unsigned int inner_tess_idxs[2]; -+ -+ struct validation_context_signature_data -+ { -+ struct validation_context_signature_stream_data -+ { -+ struct validation_context_signature_register_data -+ { -+ struct validation_context_signature_component_data -+ { -+ const struct signature_element *element; -+ } components[VKD3D_VEC4_SIZE]; -+ } registers[MAX_REG_OUTPUT]; -+ } streams[VKD3D_MAX_STREAM_COUNT]; -+ } input_signature_data, output_signature_data, patch_constant_signature_data; - }; - - static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, -@@ -7192,31 +7206,27 @@ static void vsir_validate_register_without_indices(struct validation_context *ct - reg->idx_count, reg->type); - } - --static void vsir_validate_io_register(struct validation_context *ctx, -- const struct vkd3d_shader_register *reg) -+static const struct shader_signature *vsir_signature_from_register_type(struct validation_context *ctx, -+ enum vkd3d_shader_register_type register_type, bool *has_control_point, unsigned int *control_point_count) - { -- unsigned int control_point_count = 0, control_point_index; -- const struct shader_signature *signature; -- bool has_control_point = false; -+ *has_control_point = false; - -- switch (reg->type) -+ switch (register_type) - { - case VKD3DSPR_INPUT: -- signature = &ctx->program->input_signature; -- - switch (ctx->program->shader_version.type) - { - case VKD3D_SHADER_TYPE_GEOMETRY: - case VKD3D_SHADER_TYPE_HULL: - case VKD3D_SHADER_TYPE_DOMAIN: -- has_control_point = true; -- control_point_count = ctx->program->input_control_point_count; -+ *has_control_point = true; -+ *control_point_count = ctx->program->input_control_point_count; - break; - - default: - break; - } -- break; -+ return &ctx->program->input_signature; - - case VKD3DSPR_OUTPUT: - switch (ctx->program->shader_version.type) -@@ -7225,41 +7235,45 @@ static void vsir_validate_io_register(struct validation_context *ctx, - if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE - || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) - { -- signature = &ctx->program->output_signature; -- has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; -- control_point_count = ctx->program->output_control_point_count; -+ *has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; -+ *control_point_count = ctx->program->output_control_point_count; -+ return &ctx->program->output_signature; - } - else - { -- signature = &ctx->program->patch_constant_signature; -+ return &ctx->program->patch_constant_signature; - } -- break; - - default: -- signature = &ctx->program->output_signature; -- break; -+ return &ctx->program->output_signature; - } -- break; - - case VKD3DSPR_INCONTROLPOINT: -- signature = &ctx->program->input_signature; -- has_control_point = true; -- control_point_count = ctx->program->input_control_point_count; -- break; -+ *has_control_point = true; -+ *control_point_count = ctx->program->input_control_point_count; -+ return &ctx->program->input_signature; - - case VKD3DSPR_OUTCONTROLPOINT: -- signature = &ctx->program->output_signature; -- has_control_point = true; -- control_point_count = ctx->program->output_control_point_count; -- break; -+ *has_control_point = true; -+ *control_point_count = ctx->program->output_control_point_count; -+ return &ctx->program->output_signature; - - case VKD3DSPR_PATCHCONST: -- signature = &ctx->program->patch_constant_signature; -- break; -+ return &ctx->program->patch_constant_signature; - - default: -- vkd3d_unreachable(); -+ return NULL; - } -+} -+ -+static void vsir_validate_io_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) -+{ -+ unsigned int control_point_index, control_point_count; -+ const struct shader_signature *signature; -+ bool has_control_point; -+ -+ signature = vsir_signature_from_register_type(ctx, reg->type, &has_control_point, &control_point_count); -+ VKD3D_ASSERT(signature); - - if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) - { -@@ -7330,6 +7344,11 @@ static void vsir_validate_io_register(struct validation_context *ctx, - reg->idx_count, reg->type); - return; - } -+ -+ if (is_array && !reg->idx[0].rel_addr && reg->idx[0].offset >= element->register_count) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Array index %u exceeds the signature element register count %u in a register of type %#x.", -+ reg->idx[0].offset, element->register_count, reg->type); - } - - if (has_control_point && !reg->idx[control_point_index].rel_addr -@@ -7642,8 +7661,26 @@ static void vsir_validate_register(struct validation_context *ctx, - for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i) - { - const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr; -- if (reg->idx[i].rel_addr) -+ if (param) -+ { - vsir_validate_src_param(ctx, param); -+ -+ switch (param->reg.type) -+ { -+ case VKD3DSPR_TEMP: -+ case VKD3DSPR_SSA: -+ case VKD3DSPR_ADDR: -+ case VKD3DSPR_LOOP: -+ case VKD3DSPR_OUTPOINTID: -+ break; -+ -+ default: -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x for a relative address parameter.", -+ param->reg.type); -+ break; -+ } -+ } - } - - switch (reg->type) -@@ -8025,19 +8062,32 @@ sysval_validation_data[] = - }; - - static void vsir_validate_signature_element(struct validation_context *ctx, -- const struct shader_signature *signature, enum vsir_signature_type signature_type, -- unsigned int idx) -+ const struct shader_signature *signature, struct validation_context_signature_data *signature_data, -+ enum vsir_signature_type signature_type, unsigned int idx) - { - enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; -+ bool integer_type = false, is_outer = false, is_gs_output, require_index = true; - const char *signature_type_name = signature_type_names[signature_type]; - const struct signature_element *element = &signature->elements[idx]; -- bool integer_type = false, is_outer = false; -- unsigned int semantic_index_max = 0; -+ unsigned int semantic_index_max = 0, i, j; - - if (element->register_count == 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid zero register count.", idx, signature_type_name); - -+ if (element->register_index != UINT_MAX && (element->register_index >= MAX_REG_OUTPUT -+ || MAX_REG_OUTPUT - element->register_index < element->register_count)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid register index %u and count %u.", -+ idx, signature_type_name, element->register_index, element->register_count); -+ -+ is_gs_output = ctx->program->shader_version.type == VKD3D_SHADER_TYPE_GEOMETRY -+ && signature_type == SIGNATURE_TYPE_OUTPUT; -+ if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || (element->stream_index != 0 && !is_gs_output)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid stream index %u.", -+ idx, signature_type_name, element->stream_index); -+ - if (element->mask == 0 || (element->mask & ~0xf)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid mask %#x.", idx, signature_type_name, element->mask); -@@ -8074,6 +8124,9 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - switch (element->sysval_semantic) - { - case VKD3D_SHADER_SV_NONE: -+ case VKD3D_SHADER_SV_TARGET: -+ break; -+ - case VKD3D_SHADER_SV_POSITION: - case VKD3D_SHADER_SV_CLIP_DISTANCE: - case VKD3D_SHADER_SV_CULL_DISTANCE: -@@ -8084,12 +8137,12 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - case VKD3D_SHADER_SV_INSTANCE_ID: - case VKD3D_SHADER_SV_IS_FRONT_FACE: - case VKD3D_SHADER_SV_SAMPLE_INDEX: -- case VKD3D_SHADER_SV_TARGET: - case VKD3D_SHADER_SV_DEPTH: - case VKD3D_SHADER_SV_COVERAGE: - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: - case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: - case VKD3D_SHADER_SV_STENCIL_REF: -+ require_index = false; - break; - - case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: -@@ -8130,6 +8183,11 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - break; - } - -+ if (require_index && element->register_index == UINT_MAX) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: System value semantic %#x requires a register index.", -+ idx, signature_type_name, element->sysval_semantic); -+ - if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) - { - if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) -@@ -8240,6 +8298,31 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, - "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", - idx, signature_type_name, element->interpolation_mode); -+ -+ if (element->stream_index >= VKD3D_MAX_STREAM_COUNT || !require_index) -+ return; -+ -+ for (i = element->register_index; i < MAX_REG_OUTPUT -+ && i - element->register_index < element->register_count; ++i) -+ { -+ struct validation_context_signature_stream_data *stream_data = &signature_data->streams[element->stream_index]; -+ struct validation_context_signature_register_data *register_data = &stream_data->registers[i]; -+ -+ for (j = 0; j < VKD3D_VEC4_SIZE; ++j) -+ { -+ struct validation_context_signature_component_data *component_data = ®ister_data->components[j]; -+ -+ if (!(element->mask & (1u << j))) -+ continue; -+ -+ if (!component_data->element) -+ component_data->element = element; -+ else -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Conflict with element %zu.", -+ idx, signature_type_name, component_data->element - signature->elements); -+ } -+ } - } - - static const unsigned int allowed_signature_phases[] = -@@ -8249,8 +8332,8 @@ static const unsigned int allowed_signature_phases[] = - [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, - }; - --static void vsir_validate_signature(struct validation_context *ctx, -- const struct shader_signature *signature, enum vsir_signature_type signature_type) -+static void vsir_validate_signature(struct validation_context *ctx, const struct shader_signature *signature, -+ struct validation_context_signature_data *signature_data, enum vsir_signature_type signature_type) - { - unsigned int i; - -@@ -8260,7 +8343,7 @@ static void vsir_validate_signature(struct validation_context *ctx, - "Unexpected %s signature.", signature_type_names[signature_type]); - - for (i = 0; i < signature->element_count; ++i) -- vsir_validate_signature_element(ctx, signature, signature_type, i); -+ vsir_validate_signature_element(ctx, signature, signature_data, signature_type, i); - - if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT) - { -@@ -8475,6 +8558,169 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, - instruction->declaration.max_tessellation_factor); - } - -+static void vsir_validate_dcl_index_range(struct validation_context *ctx, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ unsigned int i, j, base_register_idx, effective_write_mask = 0, control_point_count, first_component = UINT_MAX; -+ const struct vkd3d_shader_index_range *range = &instruction->declaration.index_range; -+ enum vkd3d_shader_sysval_semantic sysval = ~0u; -+ const struct shader_signature *signature; -+ bool has_control_point; -+ -+ if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -+ "DCL_INDEX_RANGE is not allowed with fully normalised input/output."); -+ return; -+ } -+ -+ if (range->dst.modifiers != VKD3DSPDM_NONE) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, -+ "Invalid modifier %#x on a DCL_INDEX_RANGE destination parameter.", range->dst.modifiers); -+ -+ if (range->dst.shift != 0) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, -+ "Invalid shift %u on a DCL_INDEX_RANGE destination parameter.", range->dst.shift); -+ -+ signature = vsir_signature_from_register_type(ctx, range->dst.reg.type, &has_control_point, &control_point_count); -+ if (!signature) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -+ "Invalid register type %#x in DCL_INDEX_RANGE instruction.", -+ range->dst.reg.type); -+ return; -+ } -+ -+ if (range->dst.reg.idx_count != 1 + !!has_control_point) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, -+ "Invalid index count %u in DCL_INDEX_RANGE instruction.", -+ range->dst.reg.idx_count); -+ return; -+ } -+ -+ if (range->dst.reg.idx[0].rel_addr || (has_control_point && range->dst.reg.idx[1].rel_addr)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid relative address in DCL_INDEX_RANGE instruction."); -+ -+ if (has_control_point) -+ { -+ if (range->dst.reg.idx[0].offset != control_point_count) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, -+ "Invalid control point index %u in DCL_INDEX_RANGE instruction, expected %u.", -+ range->dst.reg.idx[0].offset, control_point_count); -+ } -+ -+ base_register_idx = range->dst.reg.idx[1].offset; -+ } -+ else -+ { -+ base_register_idx = range->dst.reg.idx[0].offset; -+ } -+ -+ if (range->register_count < 2) -+ { -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE, -+ "Invalid register count %u in DCL_INDEX_RANGE instruction, expected at least 2.", -+ range->register_count); -+ return; -+ } -+ -+ /* Check that for each register in the range the write mask intersects at -+ * most one (and possibly zero) signature elements. Keep track of the union -+ * of all signature element masks. */ -+ for (i = 0; i < range->register_count; ++i) -+ { -+ bool found = false; -+ -+ for (j = 0; j < signature->element_count; ++j) -+ { -+ const struct signature_element *element = &signature->elements[j]; -+ -+ if (base_register_idx + i != element->register_index || !(range->dst.write_mask & element->mask)) -+ continue; -+ -+ if (found) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, -+ "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", -+ range->dst.write_mask); -+ -+ found = true; -+ -+ if (first_component == UINT_MAX) -+ first_component = vsir_write_mask_get_component_idx(element->mask); -+ else if (first_component != vsir_write_mask_get_component_idx(element->mask)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, -+ "Signature masks are not left-aligned within a DCL_INDEX_RANGE."); -+ -+ effective_write_mask |= element->mask; -+ } -+ } -+ -+ /* Check again to have at most one intersection for each register, but this -+ * time using the effective write mask. Also check that we have stabilized, -+ * i.e., the effective write mask now contains all the signature element -+ * masks. This important for being able to merge all the signature elements -+ * in a single one without conflicts (there is no hard reason why we -+ * couldn't support an effective write mask that stabilizes after more -+ * iterations, but the code would be more complicated, and we avoid that if -+ * we can). */ -+ for (i = 0; i < range->register_count; ++i) -+ { -+ bool found = false; -+ -+ for (j = 0; j < signature->element_count; ++j) -+ { -+ const struct signature_element *element = &signature->elements[j]; -+ -+ if (base_register_idx + i != element->register_index || !(effective_write_mask & element->mask)) -+ continue; -+ -+ if (element->sysval_semantic != VKD3D_SHADER_SV_NONE -+ && !vsir_sysval_semantic_is_tess_factor(element->sysval_semantic)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE.", -+ element->sysval_semantic); -+ -+ if (sysval == ~0u) -+ { -+ sysval = element->sysval_semantic; -+ /* Line density and line detail can be arrayed together. */ -+ if (sysval == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN) -+ sysval = VKD3D_SHADER_SV_TESS_FACTOR_LINEDET; -+ } -+ else -+ { -+ if (sysval != element->sysval_semantic) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Inconsistent sysval semantic %#x on a signature element touched by DCL_INDEX_RANGE, " -+ "%#x was already seen.", -+ element->sysval_semantic, sysval); -+ } -+ -+ if (found) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, -+ "Invalid write mask %#x on a DCL_INDEX_RANGE destination parameter.", -+ range->dst.write_mask); -+ -+ found = true; -+ -+ if (~effective_write_mask & element->mask) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, -+ "Invalid write mask %#x on a signature element touched by a " -+ "DCL_INDEX_RANGE instruction with effective write mask %#x.", -+ element->mask, effective_write_mask); -+ -+ if (first_component != vsir_write_mask_get_component_idx(element->mask)) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, -+ "Signature element masks are not left-aligned within a DCL_INDEX_RANGE."); -+ } -+ } -+ -+ VKD3D_ASSERT(sysval != ~0u); -+} -+ - static void vsir_validate_dcl_input(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction) - { -@@ -8937,6 +9183,7 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ - [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, - [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, - [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, -+ [VKD3DSIH_DCL_INDEX_RANGE] = {0, 0, vsir_validate_dcl_index_range}, - [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input}, - [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, - [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps}, -@@ -9122,9 +9369,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - program->output_control_point_count); - } - -- vsir_validate_signature(&ctx, &program->input_signature, SIGNATURE_TYPE_INPUT); -- vsir_validate_signature(&ctx, &program->output_signature, SIGNATURE_TYPE_OUTPUT); -- vsir_validate_signature(&ctx, &program->patch_constant_signature, SIGNATURE_TYPE_PATCH_CONSTANT); -+ vsir_validate_signature(&ctx, &program->input_signature, -+ &ctx.input_signature_data, SIGNATURE_TYPE_INPUT); -+ vsir_validate_signature(&ctx, &program->output_signature, -+ &ctx.output_signature_data, SIGNATURE_TYPE_OUTPUT); -+ vsir_validate_signature(&ctx, &program->patch_constant_signature, -+ &ctx.patch_constant_signature_data, SIGNATURE_TYPE_PATCH_CONSTANT); - - for (i = 0; i < sizeof(program->io_dcls) * CHAR_BIT; ++i) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index f1ca581f1d2..881e51527ff 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -354,22 +354,29 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, - static void VKD3D_PRINTF_FUNC(3, 4) msl_print_assignment( - struct msl_generator *gen, struct msl_dst *dst, const char *format, ...) - { -+ uint32_t modifiers = dst->vsir->modifiers; - va_list args; - - if (dst->vsir->shift) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, - "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); -- if (dst->vsir->modifiers) -+ if (modifiers & ~VKD3DSPDM_SATURATE) - msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -- "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); -+ "Internal compiler error: Unhandled destination modifier(s) %#x.", modifiers); - - msl_print_indent(gen->buffer, gen->indent); - vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); - -+ if (modifiers & VKD3DSPDM_SATURATE) -+ vkd3d_string_buffer_printf(gen->buffer, "saturate("); -+ - va_start(args, format); - vkd3d_string_buffer_vprintf(gen->buffer, format, args); - va_end(args); - -+ if (modifiers & VKD3DSPDM_SATURATE) -+ vkd3d_string_buffer_printf(gen->buffer, ")"); -+ - vkd3d_string_buffer_printf(gen->buffer, ";\n"); - } - -@@ -486,6 +493,20 @@ static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instru - msl_dst_cleanup(&dst, &gen->string_buffers); - } - -+static void msl_end_block(struct msl_generator *gen) -+{ -+ --gen->indent; -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "}\n"); -+} -+ -+static void msl_begin_block(struct msl_generator *gen) -+{ -+ msl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "{\n"); -+ ++gen->indent; -+} -+ - static void msl_if(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - const char *condition; -@@ -499,16 +520,30 @@ static void msl_if(struct msl_generator *gen, const struct vkd3d_shader_instruct - - msl_src_cleanup(&src, &gen->string_buffers); - -- msl_print_indent(gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(gen->buffer, "{\n"); -- ++gen->indent; -+ msl_begin_block(gen); - } - --static void msl_endif(struct msl_generator *gen) -+static void msl_else(struct msl_generator *gen) - { -- --gen->indent; -+ msl_end_block(gen); - msl_print_indent(gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(gen->buffer, "}\n"); -+ vkd3d_string_buffer_printf(gen->buffer, "else\n"); -+ msl_begin_block(gen); -+} -+ -+static void msl_unary_op(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) -+{ -+ struct msl_src src; -+ struct msl_dst dst; -+ uint32_t mask; -+ -+ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ msl_src_init(&src, gen, &ins->src[0], mask); -+ -+ msl_print_assignment(gen, &dst, "%s%s", op, src.str->buffer); -+ -+ msl_src_cleanup(&src, &gen->string_buffers); -+ msl_dst_cleanup(&dst, &gen->string_buffers); - } - - static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) -@@ -583,8 +618,11 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_DP4: - msl_dot(gen, ins, VKD3DSP_WRITEMASK_ALL); - break; -+ case VKD3DSIH_ELSE: -+ msl_else(gen); -+ break; - case VKD3DSIH_ENDIF: -- msl_endif(gen); -+ msl_end_block(gen); - break; - case VKD3DSIH_IEQ: - msl_relop(gen, ins, "=="); -@@ -607,9 +645,19 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_IF: - msl_if(gen, ins); - break; -+ case VKD3DSIH_ISHL: -+ msl_binop(gen, ins, "<<"); -+ break; -+ case VKD3DSIH_ISHR: -+ case VKD3DSIH_USHR: -+ msl_binop(gen, ins, ">>"); -+ break; - case VKD3DSIH_LTO: - msl_relop(gen, ins, "<"); - break; -+ case VKD3DSIH_MAD: -+ msl_intrinsic(gen, ins, "fma"); -+ break; - case VKD3DSIH_MAX: - msl_intrinsic(gen, ins, "max"); - break; -@@ -636,6 +684,9 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d - case VKD3DSIH_MUL: - msl_binop(gen, ins, "*"); - break; -+ case VKD3DSIH_NOT: -+ msl_unary_op(gen, ins, "~"); -+ break; - case VKD3DSIH_OR: - msl_binop(gen, ins, "|"); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 0b14f50a312..7837b1fc8e4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -34,6 +34,32 @@ - # include "vulkan/GLSL.std.450.h" - #endif /* HAVE_SPIRV_UNIFIED1_GLSL_STD_450_H */ - -+#define VKD3D_SPIRV_VERSION_1_0 0x00010000 -+#define VKD3D_SPIRV_VERSION_1_3 0x00010300 -+#define VKD3D_SPIRV_GENERATOR_ID 18 -+#define VKD3D_SPIRV_GENERATOR_VERSION 14 -+#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) -+#ifndef VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER -+# define VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER 0 -+#endif -+ -+#define VKD3D_SPIRV_HEADER_SIZE 5 -+ -+#define VKD3D_SPIRV_VERSION_MAJOR_SHIFT 16u -+#define VKD3D_SPIRV_VERSION_MAJOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MAJOR_SHIFT) -+#define VKD3D_SPIRV_VERSION_MINOR_SHIFT 8u -+#define VKD3D_SPIRV_VERSION_MINOR_MASK (0xffu << VKD3D_SPIRV_VERSION_MINOR_SHIFT) -+ -+#define VKD3D_SPIRV_GENERATOR_ID_SHIFT 16u -+#define VKD3D_SPIRV_GENERATOR_ID_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_ID_SHIFT) -+#define VKD3D_SPIRV_GENERATOR_VERSION_SHIFT 0u -+#define VKD3D_SPIRV_GENERATOR_VERSION_MASK (0xffffu << VKD3D_SPIRV_GENERATOR_VERSION_SHIFT) -+ -+#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT 16u -+#define VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT) -+#define VKD3D_SPIRV_INSTRUCTION_OP_SHIFT 0u -+#define VKD3D_SPIRV_INSTRUCTION_OP_MASK (0xffffu << VKD3D_SPIRV_INSTRUCTION_OP_SHIFT) -+ - #ifdef HAVE_SPIRV_TOOLS - # include "spirv-tools/libspirv.h" - -@@ -82,7 +108,7 @@ static uint32_t get_binary_to_text_options(enum vkd3d_shader_compile_option_form - return out; - } - --static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, -+static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment, - enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) - { -@@ -143,20 +169,6 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co - return result; - } - --static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, -- enum vkd3d_shader_spirv_environment environment) --{ -- static const enum vkd3d_shader_compile_option_formatting_flags formatting -- = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; -- struct vkd3d_shader_code text; -- -- if (!vkd3d_spirv_binary_to_text(spirv, environment, formatting, &text)) -- { -- vkd3d_shader_trace_text(text.code, text.size); -- vkd3d_shader_free_shader_code(&text); -- } --} -- - static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment) - { -@@ -180,14 +192,13 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc - - #else - --static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, -+static enum vkd3d_result spirv_tools_binary_to_text(const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment, - enum vkd3d_shader_compile_option_formatting_flags formatting, struct vkd3d_shader_code *out) - { - return VKD3D_ERROR; - } --static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, -- enum vkd3d_shader_spirv_environment environment) {} -+ - static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struct vkd3d_shader_code *spirv, - enum vkd3d_shader_spirv_environment environment) - { -@@ -196,6 +207,312 @@ static bool vkd3d_spirv_validate(struct vkd3d_string_buffer *buffer, const struc - - #endif /* HAVE_SPIRV_TOOLS */ - -+struct spirv_colours -+{ -+ const char *reset; -+ const char *comment; -+}; -+ -+struct spirv_parser -+{ -+ struct vkd3d_string_buffer_cache string_buffers; -+ struct vkd3d_shader_location location; -+ struct vkd3d_shader_message_context *message_context; -+ enum vkd3d_shader_compile_option_formatting_flags formatting; -+ struct spirv_colours colours; -+ bool failed; -+ -+ const uint32_t *code; -+ size_t pos; -+ size_t size; -+ -+ struct vkd3d_string_buffer *text; -+}; -+ -+static void VKD3D_PRINTF_FUNC(3, 4) spirv_parser_error(struct spirv_parser *parser, -+ enum vkd3d_shader_error error, const char *format, ...) -+{ -+ va_list args; -+ -+ va_start(args, format); -+ vkd3d_shader_verror(parser->message_context, &parser->location, error, format, args); -+ va_end(args); -+ parser->failed = true; -+} -+ -+static uint32_t spirv_parser_read_u32(struct spirv_parser *parser) -+{ -+ if (parser->pos >= parser->size) -+ { -+ parser->failed = true; -+ return 0; -+ } -+ -+ return parser->code[parser->pos++]; -+} -+ -+static void VKD3D_PRINTF_FUNC(2, 3) spirv_parser_print_comment(struct spirv_parser *parser, const char *format, ...) -+{ -+ va_list args; -+ -+ if (!parser->text) -+ return; -+ -+ va_start(args, format); -+ vkd3d_string_buffer_printf(parser->text, "%s; ", parser->colours.comment); -+ vkd3d_string_buffer_vprintf(parser->text, format, args); -+ vkd3d_string_buffer_printf(parser->text, "%s\n", parser->colours.reset); -+ va_end(args); -+} -+ -+static void spirv_parser_print_generator(struct spirv_parser *parser, uint32_t magic) -+{ -+ unsigned int id, version; -+ const char *name; -+ -+ id = (magic & VKD3D_SPIRV_GENERATOR_ID_MASK) >> VKD3D_SPIRV_GENERATOR_ID_SHIFT; -+ version = (magic & VKD3D_SPIRV_GENERATOR_VERSION_MASK) >> VKD3D_SPIRV_GENERATOR_VERSION_SHIFT; -+ -+ switch (id) -+ { -+ case VKD3D_SPIRV_GENERATOR_ID: -+ name = "Wine VKD3D Shader Compiler"; -+ break; -+ -+ default: -+ name = NULL; -+ break; -+ } -+ -+ if (name) -+ spirv_parser_print_comment(parser, "Generator: %s; %u", name, version); -+ else -+ spirv_parser_print_comment(parser, "Generator: Unknown (%#x); %u", id, version); -+} -+ -+static enum vkd3d_result spirv_parser_read_header(struct spirv_parser *parser) -+{ -+ uint32_t magic, version, generator, bound, schema; -+ unsigned int major, minor; -+ -+ if (parser->pos > parser->size || parser->size - parser->pos < VKD3D_SPIRV_HEADER_SIZE) -+ { -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, -+ "Unexpected end while reading the SPIR-V header."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ magic = spirv_parser_read_u32(parser); -+ version = spirv_parser_read_u32(parser); -+ generator = spirv_parser_read_u32(parser); -+ bound = spirv_parser_read_u32(parser); -+ schema = spirv_parser_read_u32(parser); -+ -+ if (magic != SpvMagicNumber) -+ { -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, -+ "Invalid magic number %#08x.", magic); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (version & ~(VKD3D_SPIRV_VERSION_MAJOR_MASK | VKD3D_SPIRV_VERSION_MINOR_MASK)) -+ { -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, -+ "Invalid version token %#08x.", version); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ major = (version & VKD3D_SPIRV_VERSION_MAJOR_MASK) >> VKD3D_SPIRV_VERSION_MAJOR_SHIFT; -+ minor = (version & VKD3D_SPIRV_VERSION_MINOR_MASK) >> VKD3D_SPIRV_VERSION_MINOR_SHIFT; -+ if (major != 1 || minor > 0) -+ { -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "Unable to parse SPIR-V version %u.%u.", major, minor); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ if (!bound) -+ { -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, -+ "Invalid zero id bound."); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ if (schema) -+ { -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "Unable to handle instruction schema %#08x.", schema); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ if (parser->formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER) -+ { -+ spirv_parser_print_comment(parser, "SPIR-V"); -+ spirv_parser_print_comment(parser, "Version: %u.%u", major, minor); -+ spirv_parser_print_generator(parser, generator); -+ spirv_parser_print_comment(parser, "Bound: %u", bound); -+ spirv_parser_print_comment(parser, "Schema: %u", schema); -+ } -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result spirv_parser_parse_instruction(struct spirv_parser *parser) -+{ -+ struct vkd3d_string_buffer *buffer; -+ uint16_t op, count; -+ unsigned int i; -+ uint32_t word; -+ -+ word = spirv_parser_read_u32(parser); -+ count = (word & VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_MASK) >> VKD3D_SPIRV_INSTRUCTION_WORD_COUNT_SHIFT; -+ op = (word & VKD3D_SPIRV_INSTRUCTION_OP_MASK) >> VKD3D_SPIRV_INSTRUCTION_OP_SHIFT; -+ -+ if (!count) -+ { -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, -+ "Invalid word count %u.", count); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ --count; -+ buffer = vkd3d_string_buffer_get(&parser->string_buffers); -+ for (i = 0; i < count; ++i) -+ { -+ word = spirv_parser_read_u32(parser); -+ vkd3d_string_buffer_printf(buffer, " 0x%08x", word); -+ } -+ spirv_parser_print_comment(parser, "%s", op, buffer->buffer); -+ vkd3d_string_buffer_release(&parser->string_buffers, buffer); -+ -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "Unrecognised instruction %#x.", op); -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result spirv_parser_parse(struct spirv_parser *parser, struct vkd3d_shader_code *text) -+{ -+ enum vkd3d_result ret; -+ -+ if (text) -+ parser->text = vkd3d_string_buffer_get(&parser->string_buffers); -+ -+ if ((ret = spirv_parser_read_header(parser)) < 0) -+ goto fail; -+ while (parser->pos < parser->size) -+ { -+ ++parser->location.line; -+ if ((ret = spirv_parser_parse_instruction(parser)) < 0) -+ goto fail; -+ } -+ -+ if (parser->failed) -+ { -+ ret = VKD3D_ERROR_INVALID_SHADER; -+ goto fail; -+ } -+ -+ if (text) -+ vkd3d_shader_code_from_string_buffer(text, parser->text); -+ -+ return VKD3D_OK; -+ -+fail: -+ if (parser->text) -+ { -+ if (TRACE_ON()) -+ vkd3d_string_buffer_trace(parser->text); -+ vkd3d_string_buffer_release(&parser->string_buffers, parser->text); -+ } -+ return ret; -+} -+ -+static void spirv_parser_cleanup(struct spirv_parser *parser) -+{ -+ vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); -+} -+ -+static enum vkd3d_result spirv_parser_init(struct spirv_parser *parser, const struct vkd3d_shader_code *source, -+ const char *source_name, enum vkd3d_shader_compile_option_formatting_flags formatting, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ static const struct spirv_colours no_colours = -+ { -+ .reset = "", -+ .comment = "", -+ }; -+ static const struct spirv_colours colours = -+ { -+ .reset = "\x1b[m", -+ .comment = "\x1b[36m", -+ }; -+ -+ memset(parser, 0, sizeof(*parser)); -+ parser->location.source_name = source_name; -+ parser->message_context = message_context; -+ vkd3d_string_buffer_cache_init(&parser->string_buffers); -+ -+ if (source->size % 4) -+ { -+ vkd3d_string_buffer_cache_cleanup(&parser->string_buffers); -+ spirv_parser_error(parser, VKD3D_SHADER_ERROR_SPV_INVALID_SHADER, -+ "Shader size %zu is not a multiple of four.", source->size); -+ return VKD3D_ERROR_INVALID_SHADER; -+ } -+ -+ parser->formatting = formatting; -+ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_COLOUR) -+ parser->colours = colours; -+ else -+ parser->colours = no_colours; -+ parser->code = source->code; -+ parser->size = source->size / 4; -+ -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_code *spirv, -+ const char *source_name, enum vkd3d_shader_spirv_environment environment, -+ enum vkd3d_shader_compile_option_formatting_flags formatting, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -+{ -+ struct spirv_parser parser; -+ enum vkd3d_result ret; -+ -+ if (!VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) -+ return spirv_tools_binary_to_text(spirv, environment, formatting, out); -+ -+ MESSAGE("Creating a SPIR-V parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ -+ if ((ret = spirv_parser_init(&parser, spirv, source_name, formatting, message_context)) < 0) -+ return ret; -+ -+ ret = spirv_parser_parse(&parser, out); -+ -+ spirv_parser_cleanup(&parser); -+ -+ return ret; -+} -+ -+static void vkd3d_spirv_dump(const struct vkd3d_shader_code *spirv, enum vkd3d_shader_spirv_environment environment) -+{ -+ static const enum vkd3d_shader_compile_option_formatting_flags formatting -+ = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT | VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER; -+ struct vkd3d_shader_message_context message_context; -+ struct vkd3d_shader_code text; -+ -+ vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); -+ -+ if (!vkd3d_spirv_binary_to_text(spirv, NULL, environment, formatting, &text, &message_context)) -+ { -+ vkd3d_shader_trace_text(text.code, text.size); -+ vkd3d_shader_free_shader_code(&text); -+ } -+ -+ vkd3d_shader_message_context_cleanup(&message_context); -+} -+ - enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d_shader_sysval_semantic sysval, - unsigned int index) - { -@@ -247,12 +564,6 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d - } - } - --#define VKD3D_SPIRV_VERSION_1_0 0x00010000 --#define VKD3D_SPIRV_VERSION_1_3 0x00010300 --#define VKD3D_SPIRV_GENERATOR_ID 18 --#define VKD3D_SPIRV_GENERATOR_VERSION 14 --#define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) -- - struct vkd3d_spirv_stream - { - uint32_t *words; -@@ -10664,7 +10975,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - if (compile_info->target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT) - { - struct vkd3d_shader_code text; -- if (vkd3d_spirv_binary_to_text(spirv, environment, compiler->formatting, &text) != VKD3D_OK) -+ if (vkd3d_spirv_binary_to_text(spirv, compile_info->source_name, environment, -+ compiler->formatting, &text, compiler->message_context) != VKD3D_OK) - return VKD3D_ERROR; - vkd3d_shader_free_shader_code(spirv); - *spirv = text; -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 409328b2e53..0dbcd2f6f07 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -166,21 +166,6 @@ STATIC_ASSERT(SM4_MAX_SRC_COUNT <= SPIRV_MAX_SRC_COUNT); - /* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */ - #define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2 - --#define VKD3D_SM4_REQUIRES_DOUBLES 0x00000001 --#define VKD3D_SM4_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002 --#define VKD3D_SM4_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004 --#define VKD3D_SM4_REQUIRES_64_UAVS 0x00000008 --#define VKD3D_SM4_REQUIRES_MINIMUM_PRECISION 0x00000010 --#define VKD3D_SM4_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020 --#define VKD3D_SM4_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040 --#define VKD3D_SM4_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080 --#define VKD3D_SM4_REQUIRES_TILED_RESOURCES 0x00000100 --#define VKD3D_SM4_REQUIRES_STENCIL_REF 0x00000200 --#define VKD3D_SM4_REQUIRES_INNER_COVERAGE 0x00000400 --#define VKD3D_SM4_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800 --#define VKD3D_SM4_REQUIRES_ROVS 0x00001000 --#define VKD3D_SM4_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000 -- - enum vkd3d_sm4_opcode - { - VKD3D_SM4_OP_ADD = 0x00, -@@ -1277,6 +1262,7 @@ static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_ins - { - ins->declaration.tessellator_partitioning = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -+ priv->p.program->tess_partitioning = ins->declaration.tessellator_partitioning; - } - - static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1284,6 +1270,7 @@ static void shader_sm5_read_dcl_tessellator_output_primitive(struct vkd3d_shader - { - ins->declaration.tessellator_output_primitive = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) - >> VKD3D_SM5_TESSELLATOR_SHIFT; -+ priv->p.program->tess_output_primitive = ins->declaration.tessellator_output_primitive; - } - - static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instruction *ins, uint32_t opcode, -@@ -1409,8 +1396,6 @@ struct sm4_stat - - struct tpf_compiler - { -- /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ -- struct hlsl_ctx *ctx; - struct vsir_program *program; - struct vkd3d_sm4_lookup_tables lookup; - struct sm4_stat *stat; -@@ -2917,6 +2902,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - program->input_signature = dxbc_desc.input_signature; - program->output_signature = dxbc_desc.output_signature; - program->patch_constant_signature = dxbc_desc.patch_constant_signature; -+ program->features = dxbc_desc.features; - memset(&dxbc_desc, 0, sizeof(dxbc_desc)); - - /* DXBC stores used masks inverted for output signatures, for some reason. -@@ -3207,18 +3193,17 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s - return true; - } - --static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, -- uint32_t tag, struct vkd3d_bytecode_buffer *buffer) -+static void add_section(struct tpf_compiler *tpf, uint32_t tag, struct vkd3d_bytecode_buffer *buffer) - { - /* Native D3DDisassemble() expects at least the sizes of the ISGN and OSGN - * sections to be aligned. Without this, the sections themselves will be - * aligned, but their reported sizes won't. */ - size_t size = bytecode_align(buffer); - -- dxbc_writer_add_section(dxbc, tag, buffer->data, size); -+ dxbc_writer_add_section(&tpf->dxbc, tag, buffer->data, size); - - if (buffer->status < 0) -- ctx->result = buffer->status; -+ tpf->result = buffer->status; - } - - static int signature_element_pointer_compare(const void *x, const void *y) -@@ -3279,7 +3264,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si - set_u32(&buffer, (2 + i * 6) * sizeof(uint32_t), string_offset); - } - -- add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); -+ add_section(tpf, tag, &buffer); - vkd3d_free(sorted_elements); - } - -@@ -3498,28 +3483,6 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ - } - } - --struct extern_resource --{ -- /* var is only not NULL if this resource is a whole variable, so it may be responsible for more -- * than one component. */ -- const struct hlsl_ir_var *var; -- const struct hlsl_buffer *buffer; -- -- char *name; -- bool is_user_packed; -- -- /* The data type of a single component of the resource. -- * This might be different from the data type of the resource itself in 4.0 -- * profiles, where an array (or multi-dimensional array) is handled as a -- * single resource, unlike in 5.0. */ -- struct hlsl_type *component_type; -- -- enum hlsl_regset regset; -- unsigned int id, space, index, bind_count; -- -- struct vkd3d_shader_location loc; --}; -- - static int sm4_compare_extern_resources(const void *a, const void *b) - { - const struct extern_resource *aa = (const struct extern_resource *)a; -@@ -3535,7 +3498,7 @@ static int sm4_compare_extern_resources(const void *a, const void *b) - return vkd3d_u32_compare(aa->index, bb->index); - } - --static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -+void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) - { - unsigned int i; - -@@ -3551,7 +3514,7 @@ static const char *string_skip_tag(const char *string) - return string; - } - --static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) -+struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count) - { - bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; - struct extern_resource *extern_resources = NULL; -@@ -3761,7 +3724,7 @@ static unsigned int get_component_index_from_default_initializer_index(struct hl - vkd3d_unreachable(); - } - --static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) -+void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef) - { - uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -@@ -3991,36 +3954,41 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL)); - set_u32(&buffer, creator_position, creator_offset); - -- add_section(ctx, dxbc, TAG_RDEF, &buffer); -- - sm4_free_extern_resources(extern_resources, extern_resources_count); -+ -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ rdef->code = buffer.data; -+ rdef->size = buffer.size; - } - --static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type) -+static enum vkd3d_sm4_resource_type sm4_resource_dimension(enum vkd3d_shader_resource_type resource_type) - { -- switch (type->sampler_dim) -+ switch (resource_type) - { -- case HLSL_SAMPLER_DIM_1D: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_1D: - return VKD3D_SM4_RESOURCE_TEXTURE_1D; -- case HLSL_SAMPLER_DIM_2D: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_2D: - return VKD3D_SM4_RESOURCE_TEXTURE_2D; -- case HLSL_SAMPLER_DIM_3D: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_3D: - return VKD3D_SM4_RESOURCE_TEXTURE_3D; -- case HLSL_SAMPLER_DIM_CUBE: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBE; -- case HLSL_SAMPLER_DIM_1DARRAY: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY; -- case HLSL_SAMPLER_DIM_2DARRAY: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY; -- case HLSL_SAMPLER_DIM_2DMS: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_2DMS: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMS; -- case HLSL_SAMPLER_DIM_2DMSARRAY: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY; -- case HLSL_SAMPLER_DIM_CUBEARRAY: -+ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; -- case HLSL_SAMPLER_DIM_BUFFER: -- case HLSL_SAMPLER_DIM_RAW_BUFFER: -- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ case VKD3D_SHADER_RESOURCE_BUFFER: - return VKD3D_SM4_RESOURCE_BUFFER; - default: - vkd3d_unreachable(); -@@ -4350,177 +4318,41 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s - sm4_update_stat_counters(tpf, instr); - } - --static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) -+static void tpf_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) - { -- size_t size = (cbuffer->used_size + 3) / 4; -+ const struct vkd3d_shader_constant_buffer *cb = &ins->declaration.cb; -+ size_t size = (cb->size + 3) / 4; - - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - -- .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, -- .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, -- .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, -+ .srcs[0] = cb->src, - .src_count = 1, - }; - -- if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) - { -- instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; -- instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; -- instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ -+ instr.srcs[0].reg.idx[0].offset = cb->src.reg.idx[0].offset; -+ instr.srcs[0].reg.idx[1].offset = cb->range.first; -+ instr.srcs[0].reg.idx[2].offset = cb->range.last; - instr.srcs[0].reg.idx_count = 3; - - instr.idx[0] = size; -- instr.idx[1] = cbuffer->reg.space; -+ instr.idx[1] = cb->range.space; - instr.idx_count = 2; - } - else - { -- instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; -+ instr.srcs[0].reg.idx[0].offset = cb->range.first; - instr.srcs[0].reg.idx[1].offset = size; - instr.srcs[0].reg.idx_count = 2; - } - -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_dcl_samplers(const struct tpf_compiler *tpf, const struct extern_resource *resource) --{ -- unsigned int i; -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_DCL_SAMPLER, -- -- .dsts[0].reg.type = VKD3DSPR_SAMPLER, -- .dst_count = 1, -- }; -- -- VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); -- -- if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) -- instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; -+ if (ins->flags & VKD3DSI_INDEXED_DYNAMIC) -+ instr.extra_bits |= VKD3D_SM4_INDEX_TYPE_MASK; - -- for (i = 0; i < resource->bind_count; ++i) -- { -- if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) -- continue; -- -- if (hlsl_version_ge(tpf->ctx, 5, 1)) -- { -- VKD3D_ASSERT(!i); -- instr.dsts[0].reg.idx[0].offset = resource->id; -- instr.dsts[0].reg.idx[1].offset = resource->index; -- instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -- instr.dsts[0].reg.idx_count = 3; -- -- instr.idx[0] = resource->space; -- instr.idx_count = 1; -- } -- else -- { -- instr.dsts[0].reg.idx[0].offset = resource->index + i; -- instr.dsts[0].reg.idx_count = 1; -- } -- write_sm4_instruction(tpf, &instr); -- } --} -- --static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct extern_resource *resource, -- bool uav) --{ -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -- enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; -- struct hlsl_type *component_type; -- struct sm4_instruction instr; -- bool multisampled; -- unsigned int i; -- -- VKD3D_ASSERT(resource->regset == regset); -- -- component_type = resource->component_type; -- -- for (i = 0; i < resource->bind_count; ++i) -- { -- if (resource->var && !resource->var->objects_usage[regset][i].used) -- continue; -- -- instr = (struct sm4_instruction) -- { -- .dsts[0].reg.type = uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, -- .dsts[0].reg.idx[0].offset = resource->id + i, -- .dsts[0].reg.idx_count = 1, -- .dst_count = 1, -- -- .idx[0] = sm4_data_type(component_type) * 0x1111, -- .idx_count = 1, -- }; -- -- multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -- || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; -- -- if (!vkd3d_shader_ver_ge(version, 4, 1) && multisampled && !component_type->sample_count) -- { -- hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Multisampled texture object declaration needs sample count for profile %u.%u.", -- version->major, version->minor); -- } -- -- if (vkd3d_shader_ver_ge(version, 5, 1)) -- { -- VKD3D_ASSERT(!i); -- instr.dsts[0].reg.idx[0].offset = resource->id; -- instr.dsts[0].reg.idx[1].offset = resource->index; -- instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -- instr.dsts[0].reg.idx_count = 3; -- -- instr.idx[1] = resource->space; -- instr.idx_count = 2; -- } -- else -- { -- instr.dsts[0].reg.idx[0].offset = resource->index + i; -- instr.dsts[0].reg.idx_count = 1; -- } -- -- if (uav) -- { -- switch (component_type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -- instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; -- instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; -- break; -- case HLSL_SAMPLER_DIM_RAW_BUFFER: -- instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW; -- break; -- default: -- instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; -- break; -- } -- -- if (component_type->e.resource.rasteriser_ordered) -- instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT; -- } -- else -- { -- switch (component_type->sampler_dim) -- { -- case HLSL_SAMPLER_DIM_RAW_BUFFER: -- instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; -- break; -- default: -- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; -- break; -- } -- } -- instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); -- -- if (multisampled) -- instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; -- -- write_sm4_instruction(tpf, &instr); -- } -+ write_sm4_instruction(tpf, &instr); - } - - static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) -@@ -4597,6 +4429,100 @@ static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vs - write_sm4_instruction(tpf, &instr); - } - -+static void tpf_dcl_sampler(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct vkd3d_shader_sampler *sampler = &ins->declaration.sampler; -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_SAMPLER, -+ .extra_bits = ins->flags << VKD3D_SM4_SAMPLER_MODE_SHIFT, -+ -+ .dsts[0].reg.type = VKD3DSPR_SAMPLER, -+ .dst_count = 1, -+ }; -+ -+ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 1)) -+ { -+ instr.dsts[0].reg.idx[0].offset = sampler->src.reg.idx[0].offset; -+ instr.dsts[0].reg.idx[1].offset = sampler->range.first; -+ instr.dsts[0].reg.idx[2].offset = sampler->range.last; -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[0] = ins->declaration.sampler.range.space; -+ instr.idx_count = 1; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = sampler->range.first; -+ instr.dsts[0].reg.idx_count = 1; -+ } -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_dcl_texture(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct vkd3d_shader_structured_resource *structured_resource = &ins->declaration.structured_resource; -+ const struct vkd3d_shader_semantic *semantic = &ins->declaration.semantic; -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; -+ const struct vkd3d_sm4_opcode_info *info; -+ struct sm4_instruction instr = {0}; -+ unsigned int i, k; -+ bool uav; -+ -+ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); -+ VKD3D_ASSERT(info); -+ -+ uav = ins->opcode == VKD3DSIH_DCL_UAV_TYPED -+ || ins->opcode == VKD3DSIH_DCL_UAV_RAW -+ || ins->opcode == VKD3DSIH_DCL_UAV_STRUCTURED; -+ -+ instr.opcode = info->opcode; -+ -+ instr.dsts[0] = semantic->resource.reg; -+ instr.dst_count = 1; -+ -+ for (k = 0; k < 4; ++k) -+ { -+ for (i = ARRAY_SIZE(data_type_table) - 1; i < ARRAY_SIZE(data_type_table); --i) -+ { -+ if (semantic->resource_data_type[k] == data_type_table[i]) -+ { -+ instr.idx[0] |= i << (4 * k); -+ break; -+ } -+ } -+ } -+ instr.idx_count = 1; -+ -+ if (vkd3d_shader_ver_ge(version, 5, 1)) -+ { -+ instr.dsts[0].reg.idx[0].offset = semantic->resource.reg.reg.idx[0].offset; -+ instr.dsts[0].reg.idx[1].offset = semantic->resource.range.first; -+ instr.dsts[0].reg.idx[2].offset = semantic->resource.range.last; -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[1] = semantic->resource.range.space; -+ instr.idx_count = 2; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = semantic->resource.range.first; -+ instr.dsts[0].reg.idx_count = 1; -+ } -+ -+ if (uav) -+ instr.extra_bits |= ins->flags << VKD3D_SM5_UAV_FLAGS_SHIFT; -+ -+ instr.extra_bits |= (sm4_resource_dimension(ins->resource_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); -+ instr.extra_bits |= semantic->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; -+ -+ if (ins->structured) -+ instr.byte_stride = structured_resource->byte_stride; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ - static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) - { - struct sm4_instruction instr = -@@ -4746,6 +4672,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - { - switch (ins->opcode) - { -+ case VKD3DSIH_DCL_CONSTANT_BUFFER: -+ tpf_dcl_constant_buffer(tpf, ins); -+ break; -+ - case VKD3DSIH_DCL_TEMPS: - tpf_dcl_temps(tpf, ins->declaration.count); - break; -@@ -4786,6 +4716,18 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); - break; - -+ case VKD3DSIH_DCL_SAMPLER: -+ tpf_dcl_sampler(tpf, ins); -+ break; -+ -+ case VKD3DSIH_DCL: -+ case VKD3DSIH_DCL_RESOURCE_RAW: -+ case VKD3DSIH_DCL_UAV_RAW: -+ case VKD3DSIH_DCL_UAV_STRUCTURED: -+ case VKD3DSIH_DCL_UAV_TYPED: -+ tpf_dcl_texture(tpf, ins); -+ break; -+ - case VKD3DSIH_ADD: - case VKD3DSIH_AND: - case VKD3DSIH_BREAK: -@@ -4897,16 +4839,12 @@ static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_progra - tpf_handle_instruction(tpf, &program->instructions.elements[i]); - } - --static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) -+static void tpf_write_shdr(struct tpf_compiler *tpf) - { -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -+ const struct vsir_program *program = tpf->program; -+ const struct vkd3d_shader_version *version; - struct vkd3d_bytecode_buffer buffer = {0}; -- struct extern_resource *extern_resources; -- unsigned int extern_resources_count, i; -- const struct hlsl_buffer *cbuffer; -- struct hlsl_ctx *ctx = tpf->ctx; - size_t token_count_position; -- uint32_t global_flags = 0; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { -@@ -4923,92 +4861,45 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec - - tpf->buffer = &buffer; - -- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -- -+ version = &program->shader_version; - put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); - token_count_position = put_u32(&buffer, 0); - -- if (version->major == 4) -- { -- for (i = 0; i < extern_resources_count; ++i) -- { -- const struct extern_resource *resource = &extern_resources[i]; -- const struct hlsl_type *type = resource->component_type; -- -- if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -- { -- global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; -- break; -- } -- } -- } -- -- if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) -- global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; -- -- if (global_flags) -- write_sm4_dcl_global_flags(tpf, global_flags); -+ if (program->global_flags) -+ write_sm4_dcl_global_flags(tpf, program->global_flags); - - if (version->type == VKD3D_SHADER_TYPE_HULL) - { - tpf_write_hs_decls(tpf); - -- tpf_write_dcl_input_control_point_count(tpf, 1); /* TODO: Obtain from InputPatch */ -- tpf_write_dcl_output_control_point_count(tpf, ctx->output_control_point_count); -- tpf_write_dcl_tessellator_domain(tpf, ctx->domain); -- tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); -- tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); -+ tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); -+ tpf_write_dcl_output_control_point_count(tpf, program->output_control_point_count); -+ tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); -+ tpf_write_dcl_tessellator_partitioning(tpf, program->tess_partitioning); -+ tpf_write_dcl_tessellator_output_primitive(tpf, program->tess_output_primitive); - } - else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) - { -- tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ -- tpf_write_dcl_tessellator_domain(tpf, ctx->domain); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (cbuffer->reg.allocated) -- write_sm4_dcl_constant_buffer(tpf, cbuffer); -- } -- -- for (i = 0; i < extern_resources_count; ++i) -- { -- const struct extern_resource *resource = &extern_resources[i]; -- -- if (resource->regset == HLSL_REGSET_SAMPLERS) -- write_sm4_dcl_samplers(tpf, resource); -- else if (resource->regset == HLSL_REGSET_TEXTURES) -- write_sm4_dcl_textures(tpf, resource, false); -- else if (resource->regset == HLSL_REGSET_UAVS) -- write_sm4_dcl_textures(tpf, resource, true); -+ tpf_write_dcl_input_control_point_count(tpf, program->input_control_point_count); -+ tpf_write_dcl_tessellator_domain(tpf, program->tess_domain); - } - -- tpf_write_program(tpf, tpf->program); -+ tpf_write_program(tpf, program); - - set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); - -- add_section(ctx, &tpf->dxbc, TAG_SHDR, &buffer); -+ add_section(tpf, TAG_SHDR, &buffer); - tpf->buffer = NULL; -- -- sm4_free_extern_resources(extern_resources, extern_resources_count); - } - - static void tpf_write_sfi0(struct tpf_compiler *tpf) - { -- struct extern_resource *extern_resources; -- unsigned int extern_resources_count; -- struct hlsl_ctx *ctx = tpf->ctx; - uint64_t *flags; - - flags = vkd3d_calloc(1, sizeof(*flags)); - -- extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); -- for (unsigned int i = 0; i < extern_resources_count; ++i) -- { -- if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) -- *flags |= VKD3D_SM4_REQUIRES_ROVS; -- } -- sm4_free_extern_resources(extern_resources, extern_resources_count); -+ if (tpf->program->features.rovs) -+ *flags |= DXBC_SFI0_REQUIRES_ROVS; - - /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, - * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ -@@ -5023,7 +4914,6 @@ static void tpf_write_stat(struct tpf_compiler *tpf) - { - struct vkd3d_bytecode_buffer buffer = {0}; - const struct sm4_stat *stat = tpf->stat; -- struct hlsl_ctx *ctx = tpf->ctx; - - put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]); - put_u32(&buffer, stat->fields[VKD3D_STAT_TEMPS]); -@@ -5055,7 +4945,7 @@ static void tpf_write_stat(struct tpf_compiler *tpf) - put_u32(&buffer, stat->fields[VKD3D_STAT_LOD]); - put_u32(&buffer, 0); /* Sample frequency */ - -- if (hlsl_version_ge(ctx, 5, 0)) -+ if (vkd3d_shader_ver_ge(&tpf->program->shader_version, 5, 0)) - { - put_u32(&buffer, stat->fields[VKD3D_STAT_DCL_GS_INSTANCES]); - put_u32(&buffer, stat->fields[VKD3D_STAT_TESS_CONTROL_POINT_COUNT]); -@@ -5067,15 +4957,19 @@ static void tpf_write_stat(struct tpf_compiler *tpf) - put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]); - } - -- add_section(ctx, &tpf->dxbc, TAG_STAT, &buffer); -+ add_section(tpf, TAG_STAT, &buffer); -+} -+ -+static void tpf_write_section(struct tpf_compiler *tpf, uint32_t tag, const struct vkd3d_shader_code *code) -+{ -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ -+ bytecode_put_bytes(&buffer, code->code, code->size); -+ add_section(tpf, tag, &buffer); - } - --/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving -- * data from the other parameters instead, so they can be removed from the -- * arguments and this function can be independent of HLSL structs. */ --int tpf_compile(struct vsir_program *program, uint64_t config_flags, -- struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -- struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) - { - enum vkd3d_shader_type shader_type = program->shader_version.type; - struct tpf_compiler tpf = {0}; -@@ -5083,7 +4977,6 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, - size_t i; - int ret; - -- tpf.ctx = ctx; - tpf.program = program; - tpf.buffer = NULL; - tpf.stat = &stat; -@@ -5094,14 +4987,12 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, - tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); - if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) - tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); -- write_sm4_rdef(ctx, &tpf.dxbc); -- tpf_write_shdr(&tpf, entry_func); -+ tpf_write_section(&tpf, TAG_RDEF, rdef); -+ tpf_write_shdr(&tpf); - tpf_write_sfi0(&tpf); - tpf_write_stat(&tpf); - - ret = VKD3D_OK; -- if (ctx->result) -- ret = ctx->result; - if (tpf.result) - ret = tpf.result; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index db61eec8f28..86ec8f15fb7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -1979,7 +1979,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - static const enum vkd3d_shader_target_type dxbc_tpf_types[] = - { - VKD3D_SHADER_TARGET_SPIRV_BINARY, --#ifdef HAVE_SPIRV_TOOLS -+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) - VKD3D_SHADER_TARGET_SPIRV_TEXT, - #endif - VKD3D_SHADER_TARGET_D3D_ASM, -@@ -1994,7 +1994,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - static const enum vkd3d_shader_target_type hlsl_types[] = - { - VKD3D_SHADER_TARGET_SPIRV_BINARY, --#ifdef HAVE_SPIRV_TOOLS -+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) - VKD3D_SHADER_TARGET_SPIRV_TEXT, - #endif - VKD3D_SHADER_TARGET_D3D_ASM, -@@ -2006,7 +2006,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - static const enum vkd3d_shader_target_type d3dbc_types[] = - { - VKD3D_SHADER_TARGET_SPIRV_BINARY, --#ifdef HAVE_SPIRV_TOOLS -+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) - VKD3D_SHADER_TARGET_SPIRV_TEXT, - #endif - VKD3D_SHADER_TARGET_D3D_ASM, -@@ -2016,7 +2016,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( - static const enum vkd3d_shader_target_type dxbc_dxil_types[] = - { - VKD3D_SHADER_TARGET_SPIRV_BINARY, --# ifdef HAVE_SPIRV_TOOLS -+#if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) - VKD3D_SHADER_TARGET_SPIRV_TEXT, - # endif - VKD3D_SHADER_TARGET_D3D_ASM, -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index eb6d7f26a2c..88604539fae 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -62,6 +62,8 @@ - #define VKD3D_SHADER_COMPONENT_TYPE_COUNT (VKD3D_SHADER_COMPONENT_UINT64 + 1) - #define VKD3D_SHADER_MINIMUM_PRECISION_COUNT (VKD3D_SHADER_MINIMUM_PRECISION_UINT_16 + 1) - -+#define VKD3D_MAX_STREAM_COUNT 4 -+ - enum vkd3d_shader_error - { - VKD3D_SHADER_ERROR_DXBC_INVALID_SIZE = 1, -@@ -248,6 +250,7 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_VSIR_INVALID_PARAMETER = 9020, - VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC = 9021, - VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE = 9022, -+ VKD3D_SHADER_ERROR_VSIR_INVALID_RANGE = 9023, - - VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300, - -@@ -1124,6 +1127,12 @@ bool vsir_signature_find_sysval(const struct shader_signature *signature, - enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); - void shader_signature_cleanup(struct shader_signature *signature); - -+struct vsir_features -+{ -+ /* The shader requires rasteriser-ordered views. */ -+ bool rovs; -+}; -+ - struct dxbc_shader_desc - { - const uint32_t *byte_code; -@@ -1132,6 +1141,7 @@ struct dxbc_shader_desc - struct shader_signature input_signature; - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; -+ struct vsir_features features; - }; - - struct vkd3d_shader_register_semantic -@@ -1434,8 +1444,12 @@ struct vsir_program - enum vsir_control_flow_type cf_type; - enum vsir_normalisation_level normalisation_level; - enum vkd3d_tessellator_domain tess_domain; -+ enum vkd3d_shader_tessellator_partitioning tess_partitioning; -+ enum vkd3d_shader_tessellator_output_primitive tess_output_primitive; - uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; - -+ struct vsir_features features; -+ - const char **block_names; - size_t block_name_count; - }; -@@ -1647,6 +1661,10 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); - -+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -+ - int glsl_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, - const struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info, -@@ -1665,6 +1683,9 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); - -+int tpf_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_code *rdef, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -+ - enum vkd3d_md5_variant - { - VKD3D_MD5_STANDARD, -@@ -1946,6 +1967,21 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, - - #define DXBC_MAX_SECTION_COUNT 7 - -+#define DXBC_SFI0_REQUIRES_DOUBLES 0x00000001u -+#define DXBC_SFI0_REQUIRES_EARLY_DEPTH_STENCIL 0x00000002u -+#define DXBC_SFI0_REQUIRES_UAVS_AT_EVERY_STAGE 0x00000004u -+#define DXBC_SFI0_REQUIRES_64_UAVS 0x00000008u -+#define DXBC_SFI0_REQUIRES_MINIMUM_PRECISION 0x00000010u -+#define DXBC_SFI0_REQUIRES_11_1_DOUBLE_EXTENSIONS 0x00000020u -+#define DXBC_SFI0_REQUIRES_11_1_SHADER_EXTENSIONS 0x00000040u -+#define DXBC_SFI0_REQUIRES_LEVEL_9_COMPARISON_FILTERING 0x00000080u -+#define DXBC_SFI0_REQUIRES_TILED_RESOURCES 0x00000100u -+#define DXBC_SFI0_REQUIRES_STENCIL_REF 0x00000200u -+#define DXBC_SFI0_REQUIRES_INNER_COVERAGE 0x00000400u -+#define DXBC_SFI0_REQUIRES_TYPED_UAV_LOAD_ADDITIONAL_FORMATS 0x00000800u -+#define DXBC_SFI0_REQUIRES_ROVS 0x00001000u -+#define DXBC_SFI0_REQUIRES_VIEWPORT_AND_RT_ARRAY_INDEX_FROM_ANY_SHADER_FEEDING_RASTERIZER 0x00002000u -+ - struct dxbc_writer - { - unsigned int section_count; -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index a55a97f6f2f..6c7bf167910 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -1255,6 +1255,74 @@ VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint - return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); - } - -+static void vkd3d_vk_descriptor_pool_array_cleanup(struct vkd3d_vk_descriptor_pool_array *array) -+{ -+ vkd3d_free(array->pools); -+} -+ -+static void vkd3d_vk_descriptor_pool_array_init(struct vkd3d_vk_descriptor_pool_array *array) -+{ -+ memset(array, 0, sizeof(*array)); -+} -+ -+static bool vkd3d_vk_descriptor_pool_array_push_array(struct vkd3d_vk_descriptor_pool_array *array, -+ const struct vkd3d_vk_descriptor_pool *pools, size_t count) -+{ -+ if (!vkd3d_array_reserve((void **)&array->pools, &array->capacity, array->count + count, sizeof(*array->pools))) -+ return false; -+ -+ memcpy(&array->pools[array->count], pools, count * sizeof(*pools)); -+ array->count += count; -+ -+ return true; -+} -+ -+static bool vkd3d_vk_descriptor_pool_array_push(struct vkd3d_vk_descriptor_pool_array *array, -+ unsigned int descriptor_count, VkDescriptorPool vk_pool) -+{ -+ struct vkd3d_vk_descriptor_pool pool = -+ { -+ .descriptor_count = descriptor_count, -+ .vk_pool = vk_pool, -+ }; -+ -+ return vkd3d_vk_descriptor_pool_array_push_array(array, &pool, 1); -+} -+ -+static VkDescriptorPool vkd3d_vk_descriptor_pool_array_find(struct vkd3d_vk_descriptor_pool_array *array, -+ unsigned int *descriptor_count) -+{ -+ VkDescriptorPool vk_pool; -+ size_t i; -+ -+ for (i = 0; i < array->count; ++i) -+ { -+ if (array->pools[i].descriptor_count >= *descriptor_count) -+ { -+ *descriptor_count = array->pools[i].descriptor_count; -+ vk_pool = array->pools[i].vk_pool; -+ array->pools[i] = array->pools[--array->count]; -+ -+ return vk_pool; -+ } -+ } -+ -+ return VK_NULL_HANDLE; -+} -+ -+static void vkd3d_vk_descriptor_pool_array_destroy_pools(struct vkd3d_vk_descriptor_pool_array *array, -+ const struct d3d12_device *device) -+{ -+ const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -+ size_t i; -+ -+ for (i = 0; i < array->count; ++i) -+ { -+ VK_CALL(vkDestroyDescriptorPool(device->vk_device, array->pools[i].vk_pool, NULL)); -+ } -+ array->count = 0; -+} -+ - /* Command buffers */ - static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, - const char *message, ...) -@@ -1376,18 +1444,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat - return true; - } - --static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator, -- VkDescriptorPool pool) --{ -- if (!vkd3d_array_reserve((void **)&allocator->descriptor_pools, &allocator->descriptor_pools_size, -- allocator->descriptor_pool_count + 1, sizeof(*allocator->descriptor_pools))) -- return false; -- -- allocator->descriptor_pools[allocator->descriptor_pool_count++] = pool; -- -- return true; --} -- - static bool d3d12_command_allocator_add_view(struct d3d12_command_allocator *allocator, - struct vkd3d_view *view) - { -@@ -1426,37 +1482,71 @@ static bool d3d12_command_allocator_add_transfer_buffer(struct d3d12_command_all - } - - static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( -- struct d3d12_command_allocator *allocator) -+ struct d3d12_command_allocator *allocator, enum vkd3d_shader_descriptor_type descriptor_type, -+ unsigned int descriptor_count, bool unbounded) - { - struct d3d12_device *device = allocator->device; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - struct VkDescriptorPoolCreateInfo pool_desc; - VkDevice vk_device = device->vk_device; -+ VkDescriptorPoolSize vk_pool_sizes[2]; -+ unsigned int pool_size, pool_limit; - VkDescriptorPool vk_pool; - VkResult vr; - -- if (allocator->free_descriptor_pool_count > 0) -- { -- vk_pool = allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1]; -- allocator->free_descriptor_pools[allocator->free_descriptor_pool_count - 1] = VK_NULL_HANDLE; -- --allocator->free_descriptor_pool_count; -- } -- else -+ if (!(vk_pool = vkd3d_vk_descriptor_pool_array_find(&allocator->free_descriptor_pools[descriptor_type], -+ &descriptor_count))) - { -+ pool_limit = device->vk_pool_limits[descriptor_type]; -+ -+ if (descriptor_count > pool_limit) -+ { -+ if (!unbounded) -+ { -+ ERR("Descriptor count %u exceeds maximum pool size %u.\n", descriptor_count, pool_limit); -+ return VK_NULL_HANDLE; -+ } -+ -+ WARN("Clamping descriptor count %u to maximum pool size %u for unbounded allocation.\n", -+ descriptor_count, pool_limit); -+ descriptor_count = pool_limit; -+ } -+ -+ pool_size = allocator->vk_pool_sizes[descriptor_type]; -+ if (descriptor_count > pool_size) -+ { -+ pool_size = 1u << (vkd3d_log2i(descriptor_count - 1) + 1); -+ pool_size = min(pool_limit, pool_size); -+ } -+ descriptor_count = pool_size; -+ -+ vk_pool_sizes[0].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, true); -+ vk_pool_sizes[0].descriptorCount = descriptor_count; -+ -+ vk_pool_sizes[1].type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, false); -+ vk_pool_sizes[1].descriptorCount = descriptor_count; -+ - pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - pool_desc.pNext = NULL; - pool_desc.flags = 0; - pool_desc.maxSets = 512; -- pool_desc.poolSizeCount = device->vk_pool_count; -- pool_desc.pPoolSizes = device->vk_pool_sizes; -+ pool_desc.poolSizeCount = 1; -+ if (vk_pool_sizes[1].type != vk_pool_sizes[0].type) -+ ++pool_desc.poolSizeCount; -+ pool_desc.pPoolSizes = vk_pool_sizes; -+ - if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &vk_pool))) < 0) - { - ERR("Failed to create descriptor pool, vr %d.\n", vr); - return VK_NULL_HANDLE; - } -+ -+ if (!unbounded || descriptor_count < pool_limit) -+ allocator->vk_pool_sizes[descriptor_type] = min(pool_limit, descriptor_count * 2); - } - -- if (!(d3d12_command_allocator_add_descriptor_pool(allocator, vk_pool))) -+ if (!(vkd3d_vk_descriptor_pool_array_push(&allocator->descriptor_pools[descriptor_type], -+ descriptor_count, vk_pool))) - { - ERR("Failed to add descriptor pool.\n"); - VK_CALL(vkDestroyDescriptorPool(vk_device, vk_pool, NULL)); -@@ -1466,9 +1556,9 @@ static VkDescriptorPool d3d12_command_allocator_allocate_descriptor_pool( - return vk_pool; - } - --static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( -- struct d3d12_command_allocator *allocator, VkDescriptorSetLayout vk_set_layout, -- unsigned int variable_binding_size, bool unbounded) -+static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set(struct d3d12_command_allocator *allocator, -+ enum vkd3d_shader_descriptor_type descriptor_type, unsigned int descriptor_count, -+ VkDescriptorSetLayout vk_set_layout, unsigned int variable_binding_size, bool unbounded) - { - struct d3d12_device *device = allocator->device; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -@@ -1478,14 +1568,15 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( - VkDescriptorSet vk_descriptor_set; - VkResult vr; - -- if (!allocator->vk_descriptor_pool) -- allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); -- if (!allocator->vk_descriptor_pool) -+ if (!allocator->vk_descriptor_pools[descriptor_type]) -+ allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, -+ descriptor_type, descriptor_count, unbounded); -+ if (!allocator->vk_descriptor_pools[descriptor_type]) - return VK_NULL_HANDLE; - - set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - set_desc.pNext = NULL; -- set_desc.descriptorPool = allocator->vk_descriptor_pool; -+ set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; - set_desc.descriptorSetCount = 1; - set_desc.pSetLayouts = &vk_set_layout; - if (unbounded) -@@ -1499,16 +1590,17 @@ static VkDescriptorSet d3d12_command_allocator_allocate_descriptor_set( - if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) >= 0) - return vk_descriptor_set; - -- allocator->vk_descriptor_pool = VK_NULL_HANDLE; -+ allocator->vk_descriptor_pools[descriptor_type] = VK_NULL_HANDLE; - if (vr == VK_ERROR_FRAGMENTED_POOL || vr == VK_ERROR_OUT_OF_POOL_MEMORY_KHR) -- allocator->vk_descriptor_pool = d3d12_command_allocator_allocate_descriptor_pool(allocator); -- if (!allocator->vk_descriptor_pool) -+ allocator->vk_descriptor_pools[descriptor_type] = d3d12_command_allocator_allocate_descriptor_pool(allocator, -+ descriptor_type, descriptor_count, unbounded); -+ if (!allocator->vk_descriptor_pools[descriptor_type]) - { - ERR("Failed to allocate descriptor set, vr %d.\n", vr); - return VK_NULL_HANDLE; - } - -- set_desc.descriptorPool = allocator->vk_descriptor_pool; -+ set_desc.descriptorPool = allocator->vk_descriptor_pools[descriptor_type]; - if ((vr = VK_CALL(vkAllocateDescriptorSets(vk_device, &set_desc, &vk_descriptor_set))) < 0) - { - FIXME("Failed to allocate descriptor set from a new pool, vr %d.\n", vr); -@@ -1534,38 +1626,50 @@ static void vkd3d_buffer_destroy(struct vkd3d_buffer *buffer, struct d3d12_devic - VK_CALL(vkDestroyBuffer(device->vk_device, buffer->vk_buffer, NULL)); - } - -+static void d3d12_command_allocator_reset_descriptor_pool_array(struct d3d12_command_allocator *allocator, -+ enum vkd3d_shader_descriptor_type type) -+{ -+ struct vkd3d_vk_descriptor_pool_array *array = &allocator->descriptor_pools[type]; -+ struct d3d12_device *device = allocator->device; -+ const struct vkd3d_vk_device_procs *vk_procs; -+ const struct vkd3d_vk_descriptor_pool *pool; -+ size_t i; -+ -+ vk_procs = &device->vk_procs; -+ for (i = 0; i < array->count; ++i) -+ { -+ pool = &array->pools[i]; -+ if (pool->descriptor_count < allocator->vk_pool_sizes[type] -+ || !vkd3d_vk_descriptor_pool_array_push_array(&allocator->free_descriptor_pools[type], pool, 1)) -+ VK_CALL(vkDestroyDescriptorPool(device->vk_device, pool->vk_pool, NULL)); -+ else -+ VK_CALL(vkResetDescriptorPool(device->vk_device, pool->vk_pool, 0)); -+ } -+ array->count = 0; -+} -+ - static void d3d12_command_allocator_free_resources(struct d3d12_command_allocator *allocator, - bool keep_reusable_resources) - { - struct d3d12_device *device = allocator->device; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; -- unsigned int i, j; -+ unsigned int i; - -- allocator->vk_descriptor_pool = VK_NULL_HANDLE; -+ memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools)); - - if (keep_reusable_resources) - { -- if (vkd3d_array_reserve((void **)&allocator->free_descriptor_pools, -- &allocator->free_descriptor_pools_size, -- allocator->free_descriptor_pool_count + allocator->descriptor_pool_count, -- sizeof(*allocator->free_descriptor_pools))) -+ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) - { -- for (i = 0, j = allocator->free_descriptor_pool_count; i < allocator->descriptor_pool_count; ++i, ++j) -- { -- VK_CALL(vkResetDescriptorPool(device->vk_device, allocator->descriptor_pools[i], 0)); -- allocator->free_descriptor_pools[j] = allocator->descriptor_pools[i]; -- } -- allocator->free_descriptor_pool_count += allocator->descriptor_pool_count; -- allocator->descriptor_pool_count = 0; -+ d3d12_command_allocator_reset_descriptor_pool_array(allocator, i); - } - } - else - { -- for (i = 0; i < allocator->free_descriptor_pool_count; ++i) -+ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) - { -- VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->free_descriptor_pools[i], NULL)); -+ vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->free_descriptor_pools[i], device); - } -- allocator->free_descriptor_pool_count = 0; - } - - for (i = 0; i < allocator->transfer_buffer_count; ++i) -@@ -1586,11 +1690,10 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato - } - allocator->view_count = 0; - -- for (i = 0; i < allocator->descriptor_pool_count; ++i) -+ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) - { -- VK_CALL(vkDestroyDescriptorPool(device->vk_device, allocator->descriptor_pools[i], NULL)); -+ vkd3d_vk_descriptor_pool_array_destroy_pools(&allocator->descriptor_pools[i], device); - } -- allocator->descriptor_pool_count = 0; - - for (i = 0; i < allocator->framebuffer_count; ++i) - { -@@ -1647,6 +1750,7 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo - { - struct d3d12_command_allocator *allocator = impl_from_ID3D12CommandAllocator(iface); - unsigned int refcount = vkd3d_atomic_decrement_u32(&allocator->refcount); -+ size_t i; - - TRACE("%p decreasing refcount to %u.\n", allocator, refcount); - -@@ -1664,8 +1768,11 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo - vkd3d_free(allocator->transfer_buffers); - vkd3d_free(allocator->buffer_views); - vkd3d_free(allocator->views); -- vkd3d_free(allocator->descriptor_pools); -- vkd3d_free(allocator->free_descriptor_pools); -+ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) -+ { -+ vkd3d_vk_descriptor_pool_array_cleanup(&allocator->descriptor_pools[i]); -+ vkd3d_vk_descriptor_pool_array_cleanup(&allocator->free_descriptor_pools[i]); -+ } - vkd3d_free(allocator->framebuffers); - vkd3d_free(allocator->passes); - -@@ -1822,6 +1929,7 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo - struct vkd3d_queue *queue; - VkResult vr; - HRESULT hr; -+ size_t i; - - if (FAILED(hr = vkd3d_private_store_init(&allocator->private_store))) - return hr; -@@ -1851,11 +1959,12 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo - return hresult_from_vk_result(vr); - } - -- allocator->vk_descriptor_pool = VK_NULL_HANDLE; -+ memset(allocator->vk_descriptor_pools, 0, sizeof(allocator->vk_descriptor_pools)); - -- allocator->free_descriptor_pools = NULL; -- allocator->free_descriptor_pools_size = 0; -- allocator->free_descriptor_pool_count = 0; -+ for (i = 0; i < ARRAY_SIZE(allocator->free_descriptor_pools); ++i) -+ { -+ vkd3d_vk_descriptor_pool_array_init(&allocator->free_descriptor_pools[i]); -+ } - - allocator->passes = NULL; - allocator->passes_size = 0; -@@ -1865,9 +1974,11 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo - allocator->framebuffers_size = 0; - allocator->framebuffer_count = 0; - -- allocator->descriptor_pools = NULL; -- allocator->descriptor_pools_size = 0; -- allocator->descriptor_pool_count = 0; -+ for (i = 0; i < ARRAY_SIZE(allocator->descriptor_pools); ++i) -+ { -+ vkd3d_vk_descriptor_pool_array_init(&allocator->descriptor_pools[i]); -+ allocator->vk_pool_sizes[i] = min(VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE, device->vk_pool_limits[i]); -+ } - - allocator->views = NULL; - allocator->views_size = 0; -@@ -2749,7 +2860,8 @@ static void d3d12_command_list_prepare_descriptors(struct d3d12_command_list *li - } - - vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, -- layout->vk_layout, variable_binding_size, unbounded_offset != UINT_MAX); -+ layout->descriptor_type, layout->descriptor_count + variable_binding_size, layout->vk_layout, -+ variable_binding_size, unbounded_offset != UINT_MAX); - bindings->descriptor_sets[bindings->descriptor_set_count++] = vk_descriptor_set; - } - -@@ -2805,15 +2917,8 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des - break; - } - -- if (range->descriptor_count == UINT_MAX) -- { -- vk_descriptor_write->dstSet = vk_descriptor_sets[set + 1]; -- vk_descriptor_write->dstBinding = 0; -- } -- else -- { -- vk_descriptor_write->dstBinding += use_array ? 1 : range->descriptor_count; -- } -+ vk_descriptor_write->dstSet = vk_descriptor_sets[range->image_set]; -+ vk_descriptor_write->dstBinding = use_array ? range->image_binding : range->image_binding + index; - - vk_image_info->sampler = VK_NULL_HANDLE; - vk_image_info->imageView = u.view->v.u.vk_image_view; -@@ -2934,10 +3039,11 @@ static void d3d12_command_list_update_descriptor_table(struct d3d12_command_list - } - - static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *vk_descriptor_write, -- const struct d3d12_root_parameter *root_parameter, VkDescriptorSet vk_descriptor_set, -+ const struct d3d12_root_parameter *root_parameter, const VkDescriptorSet *vk_descriptor_sets, - VkBufferView *vk_buffer_view, const VkDescriptorBufferInfo *vk_buffer_info) - { - const struct d3d12_root_descriptor *root_descriptor; -+ VkDescriptorSet vk_descriptor_set; - - switch (root_parameter->parameter_type) - { -@@ -2956,6 +3062,7 @@ static bool vk_write_descriptor_set_from_root_descriptor(VkWriteDescriptorSet *v - } - - root_descriptor = &root_parameter->u.descriptor; -+ vk_descriptor_set = vk_descriptor_sets ? vk_descriptor_sets[root_descriptor->set] : VK_NULL_HANDLE; - - vk_descriptor_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_write->pNext = NULL; -@@ -3011,7 +3118,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list - } - - if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], -- root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) -+ root_parameter, bindings->descriptor_sets, vk_buffer_view, vk_buffer_info)) - continue; - - ++descriptor_count; -@@ -3039,8 +3146,8 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma - uav_counter_count = state->uav_counters.binding_count; - if (!(vk_descriptor_writes = vkd3d_calloc(uav_counter_count, sizeof(*vk_descriptor_writes)))) - return; -- if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set( -- list->allocator, state->uav_counters.vk_set_layout, 0, false))) -+ if (!(vk_descriptor_set = d3d12_command_allocator_allocate_descriptor_set(list->allocator, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_counter_count, state->uav_counters.vk_set_layout, 0, false))) - goto done; - - for (i = 0; i < uav_counter_count; ++i) -@@ -4612,8 +4719,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - - if (vk_info->KHR_push_descriptor) - { -- vk_write_descriptor_set_from_root_descriptor(&descriptor_write, -- root_parameter, VK_NULL_HANDLE, NULL, &buffer_info); -+ vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, NULL, &buffer_info); - VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, - root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); - } -@@ -4621,7 +4727,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - { - d3d12_command_list_prepare_descriptors(list, bind_point); - vk_write_descriptor_set_from_root_descriptor(&descriptor_write, -- root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); -+ root_parameter, bindings->descriptor_sets, NULL, &buffer_info); - VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - - VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); -@@ -4685,8 +4791,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - - if (vk_info->KHR_push_descriptor) - { -- vk_write_descriptor_set_from_root_descriptor(&descriptor_write, -- root_parameter, VK_NULL_HANDLE, &vk_buffer_view, NULL); -+ vk_write_descriptor_set_from_root_descriptor(&descriptor_write, root_parameter, NULL, &vk_buffer_view, NULL); - VK_CALL(vkCmdPushDescriptorSetKHR(list->vk_command_buffer, bindings->vk_bind_point, - root_signature->vk_pipeline_layout, 0, 1, &descriptor_write)); - } -@@ -4694,7 +4799,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - { - d3d12_command_list_prepare_descriptors(list, bind_point); - vk_write_descriptor_set_from_root_descriptor(&descriptor_write, -- root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); -+ root_parameter, bindings->descriptor_sets, &vk_buffer_view, NULL); - VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - - VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); -@@ -5371,8 +5476,8 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - view->info.texture.vk_view_type, view->format->type, &pipeline); - } - -- if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set( -- list->allocator, pipeline.vk_set_layout, 0, false))) -+ if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set(list->allocator, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, 1, pipeline.vk_set_layout, 0, false))) - { - ERR("Failed to allocate descriptor set.\n"); - return; -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index e92373a36fa..7b491805a72 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1473,16 +1473,21 @@ static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_des - uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; - } - -- limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, -- properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); -- limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, -- properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); -- limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, -- properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); -- limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, -- properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); -- limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, -- properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); -+ limits->uniform_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, -+ properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision), -+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); -+ limits->sampled_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSampledImages, -+ properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision), -+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); -+ limits->storage_buffer_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, -+ properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision), -+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); -+ limits->storage_image_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindStorageImages, -+ properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision), -+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); -+ limits->sampler_max_descriptors = min(min(properties->maxDescriptorSetUpdateAfterBindSamplers, -+ properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision), -+ VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS); - limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); - } - -@@ -2677,39 +2682,16 @@ static void vkd3d_time_domains_init(struct d3d12_device *device) - static void device_init_descriptor_pool_sizes(struct d3d12_device *device) - { - const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; -- VkDescriptorPoolSize *pool_sizes = device->vk_pool_sizes; -+ unsigned int *pool_sizes = device->vk_pool_limits; - -- if (device->use_vk_heaps) -- { -- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -- pool_sizes[0].descriptorCount = min(limits->storage_image_max_descriptors, -- VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); -- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; -- pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; -- pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; -- pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); -- device->vk_pool_count = 3; -- return; -- } -- -- VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); -- pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -- pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, -+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_CBV] = min(limits->uniform_buffer_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -- pool_sizes[1].type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; -- pool_sizes[1].descriptorCount = min(limits->sampled_image_max_descriptors, -+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SRV] = min(limits->sampled_image_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -- pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -- pool_sizes[2].descriptorCount = pool_sizes[1].descriptorCount; -- pool_sizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; -- pool_sizes[3].descriptorCount = min(limits->storage_image_max_descriptors, -+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_UAV] = min(limits->storage_image_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -- pool_sizes[4].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; -- pool_sizes[4].descriptorCount = pool_sizes[3].descriptorCount; -- pool_sizes[5].type = VK_DESCRIPTOR_TYPE_SAMPLER; -- pool_sizes[5].descriptorCount = min(limits->sampler_max_descriptors, -+ pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER] = min(limits->sampler_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -- device->vk_pool_count = 6; - }; - - static void vkd3d_desc_object_cache_init(struct vkd3d_desc_object_cache *cache, size_t size) -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 2b0f81d3812..32f34479ea1 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -265,25 +265,6 @@ static enum vkd3d_shader_visibility vkd3d_shader_visibility_from_d3d12(D3D12_SHA - } - } - --static VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, -- bool is_buffer) --{ -- switch (type) -- { -- case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: -- return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -- case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: -- return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; -- case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -- return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -- case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: -- return VK_DESCRIPTOR_TYPE_SAMPLER; -- default: -- FIXME("Unhandled descriptor range type type %#x.\n", type); -- return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -- } --} -- - static enum vkd3d_shader_descriptor_type vkd3d_descriptor_type_from_d3d12_range_type( - D3D12_DESCRIPTOR_RANGE_TYPE type) - { -@@ -717,6 +698,8 @@ struct vk_binding_array - VkDescriptorSetLayoutBinding *bindings; - size_t capacity, count; - -+ enum vkd3d_shader_descriptor_type descriptor_type; -+ unsigned int descriptor_set; - unsigned int table_index; - unsigned int unbounded_offset; - VkDescriptorSetLayoutCreateFlags flags; -@@ -754,14 +737,24 @@ static bool vk_binding_array_add_binding(struct vk_binding_array *array, - return true; - } - -+static void vk_binding_array_make_unbound(struct vk_binding_array *array, -+ unsigned int offset, unsigned int table_index) -+{ -+ array->unbounded_offset = offset; -+ array->table_index = table_index; -+} -+ - struct vkd3d_descriptor_set_context - { - struct vk_binding_array vk_bindings[VKD3D_MAX_DESCRIPTOR_SETS]; -+ struct vk_binding_array *current_binding_array[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; - unsigned int table_index; -- unsigned int unbounded_offset; - unsigned int descriptor_index; - unsigned int uav_counter_index; - unsigned int push_constant_index; -+ -+ struct vk_binding_array *push_descriptor_set; -+ bool push_descriptor; - }; - - static void descriptor_set_context_cleanup(struct vkd3d_descriptor_set_context *context) -@@ -786,46 +779,66 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns - return true; - } - --static struct vk_binding_array *d3d12_root_signature_current_vk_binding_array( -- struct d3d12_root_signature *root_signature, struct vkd3d_descriptor_set_context *context) -+static struct vk_binding_array *d3d12_root_signature_append_vk_binding_array( -+ struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, -+ VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) - { -+ struct vk_binding_array *array; -+ unsigned int set; -+ - if (root_signature->vk_set_count >= ARRAY_SIZE(context->vk_bindings)) - return NULL; - -- return &context->vk_bindings[root_signature->vk_set_count]; -+ set = root_signature->vk_set_count++; -+ array = &context->vk_bindings[set]; -+ array->descriptor_type = descriptor_type; -+ array->descriptor_set = set; -+ array->unbounded_offset = UINT_MAX; -+ array->flags = flags; -+ -+ return array; - } - --static void d3d12_root_signature_append_vk_binding_array(struct d3d12_root_signature *root_signature, -- VkDescriptorSetLayoutCreateFlags flags, struct vkd3d_descriptor_set_context *context) -+static struct vk_binding_array *d3d12_root_signature_vk_binding_array_for_type( -+ struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, -+ struct vkd3d_descriptor_set_context *context) - { -- struct vk_binding_array *array; -+ struct vk_binding_array *array, **current; - -- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) || !array->count) -- return; -+ if (context->push_descriptor) -+ { -+ if (!context->push_descriptor_set) -+ context->push_descriptor_set = d3d12_root_signature_append_vk_binding_array(root_signature, -+ descriptor_type, VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, context); - -- array->table_index = context->table_index; -- array->unbounded_offset = context->unbounded_offset; -- array->flags = flags; -+ return context->push_descriptor_set; -+ } - -- ++root_signature->vk_set_count; -+ current = context->current_binding_array; -+ if (!(array = current[descriptor_type])) -+ { -+ array = d3d12_root_signature_append_vk_binding_array(root_signature, descriptor_type, 0, context); -+ current[descriptor_type] = array; -+ } -+ -+ return array; - } - - static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, -- enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, -- unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, -- unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context, -- const VkSampler *immutable_sampler, unsigned int *binding_idx) -+ struct vk_binding_array *array, enum vkd3d_shader_descriptor_type descriptor_type, -+ unsigned int register_space, unsigned int register_idx, bool buffer_descriptor, -+ enum vkd3d_shader_visibility shader_visibility, unsigned int descriptor_count, -+ struct vkd3d_descriptor_set_context *context, const VkSampler *immutable_sampler) - { - struct vkd3d_shader_descriptor_offset *offset = root_signature->descriptor_offsets - ? &root_signature->descriptor_offsets[context->descriptor_index] : NULL; - struct vkd3d_shader_resource_binding *mapping; -- struct vk_binding_array *array; -+ VkDescriptorType vk_descriptor_type; - unsigned int idx; - -- if (!(array = d3d12_root_signature_current_vk_binding_array(root_signature, context)) -- || !(vk_binding_array_add_binding(&context->vk_bindings[root_signature->vk_set_count], -- vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor), descriptor_count, -- stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx))) -+ vk_descriptor_type = vk_descriptor_type_from_vkd3d_descriptor_type(descriptor_type, buffer_descriptor); -+ if (!vk_binding_array_add_binding(array, vk_descriptor_type, descriptor_count, -+ stage_flags_from_vkd3d_shader_visibility(shader_visibility), immutable_sampler, &idx)) - return E_OUTOFMEMORY; - - mapping = &root_signature->descriptor_mapping[context->descriptor_index++]; -@@ -834,7 +847,7 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur - mapping->register_index = register_idx; - mapping->shader_visibility = shader_visibility; - mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; -- mapping->binding.set = root_signature->vk_set_count; -+ mapping->binding.set = array->descriptor_set; - mapping->binding.binding = idx; - mapping->binding.count = descriptor_count; - if (offset) -@@ -843,12 +856,6 @@ static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signatur - offset->dynamic_offset_index = ~0u; - } - -- if (context->unbounded_offset != UINT_MAX) -- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); -- -- if (binding_idx) -- *binding_idx = idx; -- - return S_OK; - } - -@@ -911,7 +918,7 @@ static unsigned int vk_binding_count_from_descriptor_range(const struct d3d12_ro - } - - static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_root_signature *root_signature, -- const struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, -+ struct d3d12_root_descriptor_table_range *range, D3D12_SHADER_VISIBILITY visibility, - unsigned int vk_binding_array_count, unsigned int bindings_per_range, - struct vkd3d_descriptor_set_context *context) - { -@@ -919,34 +926,49 @@ static HRESULT d3d12_root_signature_init_descriptor_table_binding(struct d3d12_r - bool is_buffer = range->type != VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER; - enum vkd3d_shader_descriptor_type descriptor_type = range->type; - unsigned int i, register_space = range->register_space; -+ struct vk_binding_array *array; - HRESULT hr; - -- if (range->descriptor_count == UINT_MAX) -- context->unbounded_offset = range->offset; -+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) -+ return E_OUTOFMEMORY; - -+ range->set = array->descriptor_set - root_signature->main_set; -+ range->binding = array->count; - for (i = 0; i < bindings_per_range; ++i) - { -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, - register_space, range->base_register_idx + i, is_buffer, shader_visibility, -- vk_binding_array_count, context, NULL, NULL))) -+ vk_binding_array_count, context, NULL))) - return hr; - } - -- if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ if (range->descriptor_count == UINT_MAX) - { -- context->unbounded_offset = UINT_MAX; -- return S_OK; -+ vk_binding_array_make_unbound(array, range->offset, context->table_index); -+ context->current_binding_array[descriptor_type] = NULL; - } - -+ if (descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_SRV && descriptor_type != VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) -+ return S_OK; -+ -+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) -+ return E_OUTOFMEMORY; -+ -+ range->image_set = array->descriptor_set - root_signature->main_set; -+ range->image_binding = array->count; - for (i = 0; i < bindings_per_range; ++i) - { -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, - register_space, range->base_register_idx + i, false, shader_visibility, -- vk_binding_array_count, context, NULL, NULL))) -+ vk_binding_array_count, context, NULL))) - return hr; - } - -- context->unbounded_offset = UINT_MAX; -+ if (range->descriptor_count == UINT_MAX) -+ { -+ vk_binding_array_make_unbound(array, range->offset, context->table_index); -+ context->current_binding_array[descriptor_type] = NULL; -+ } - - return S_OK; - } -@@ -1199,16 +1221,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - - if (use_vk_heaps) - { -- /* set, binding and vk_binding_count are not used. */ -+ /* set, binding, image_set, image_binding, and vk_binding_count are not used. */ - range->set = 0; - range->binding = 0; -+ range->image_set = 0; -+ range->image_binding = 0; - range->vk_binding_count = 0; - d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); - continue; - } - -- range->set = root_signature->vk_set_count - root_signature->main_set; -- - if (root_signature->use_descriptor_arrays) - { - if (j && range->type != table->ranges[j - 1].type) -@@ -1229,6 +1251,8 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - - range->set = base_range->set; - range->binding = base_range->binding; -+ range->image_set = base_range->image_set; -+ range->image_binding = base_range->image_binding; - range->vk_binding_count = base_range->vk_binding_count - rel_offset; - d3d12_root_signature_map_descriptor_unbounded_binding(root_signature, range, - rel_offset, shader_visibility, context); -@@ -1251,8 +1275,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - bindings_per_range = range->descriptor_count; - } - -- range->binding = context->vk_bindings[root_signature->vk_set_count].count; -- - if (FAILED(hr = d3d12_root_signature_init_descriptor_table_binding(root_signature, range, - p->ShaderVisibility, vk_binding_array_count, bindings_per_range, context))) - return hr; -@@ -1266,7 +1288,9 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC *desc, struct vkd3d_descriptor_set_context *context) - { -- unsigned int binding, i; -+ enum vkd3d_shader_descriptor_type descriptor_type; -+ struct vk_binding_array *array; -+ unsigned int i; - HRESULT hr; - - root_signature->push_descriptor_mask = 0; -@@ -1281,14 +1305,19 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign - - root_signature->push_descriptor_mask |= 1u << i; - -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, -- vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), -+ descriptor_type = vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType); -+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, descriptor_type, context))) -+ return E_OUTOFMEMORY; -+ -+ root_signature->parameters[i].parameter_type = p->ParameterType; -+ root_signature->parameters[i].u.descriptor.set = array->descriptor_set; -+ root_signature->parameters[i].u.descriptor.binding = array->count; -+ -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, descriptor_type, - p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, true, -- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL, &binding))) -+ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), 1, context, NULL))) - return hr; - -- root_signature->parameters[i].parameter_type = p->ParameterType; -- root_signature->parameters[i].u.descriptor.binding = binding; - } - - return S_OK; -@@ -1298,10 +1327,19 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - struct d3d12_device *device, const D3D12_ROOT_SIGNATURE_DESC *desc, - struct vkd3d_descriptor_set_context *context) - { -+ struct vk_binding_array *array; - unsigned int i; - HRESULT hr; - - VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); -+ -+ if (!desc->NumStaticSamplers) -+ return S_OK; -+ -+ if (!(array = d3d12_root_signature_vk_binding_array_for_type(root_signature, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, context))) -+ return E_OUTOFMEMORY; -+ - for (i = 0; i < desc->NumStaticSamplers; ++i) - { - const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; -@@ -1309,16 +1347,13 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) - return hr; - -- if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, array, - VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, false, - vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), 1, context, -- &root_signature->static_samplers[i], NULL))) -+ &root_signature->static_samplers[i]))) - return hr; - } - -- if (device->use_vk_heaps) -- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); -- - return S_OK; - } - -@@ -1450,29 +1485,52 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, - return S_OK; - } - -+static HRESULT d3d12_descriptor_set_layout_init(struct d3d12_descriptor_set_layout *layout, -+ struct d3d12_device *device, const struct vk_binding_array *array) -+{ -+ unsigned int descriptor_count; -+ bool unbounded; -+ HRESULT hr; -+ size_t i; -+ -+ descriptor_count = array->unbounded_offset; -+ if (!(unbounded = descriptor_count != UINT_MAX)) -+ { -+ for (i = 0, descriptor_count = 0; i < array->count; ++i) -+ { -+ descriptor_count += array->bindings[i].descriptorCount; -+ } -+ } -+ -+ if (FAILED(hr = vkd3d_create_descriptor_set_layout(device, array->flags, -+ array->count, unbounded, array->bindings, &layout->vk_layout))) -+ return hr; -+ layout->descriptor_type = array->descriptor_type; -+ layout->descriptor_count = descriptor_count; -+ layout->unbounded_offset = array->unbounded_offset; -+ layout->table_index = array->table_index; -+ -+ return S_OK; -+} -+ - static HRESULT d3d12_root_signature_create_descriptor_set_layouts(struct d3d12_root_signature *root_signature, - struct vkd3d_descriptor_set_context *context) - { - unsigned int i; - HRESULT hr; - -- d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); -- - if (!vkd3d_validate_descriptor_set_count(root_signature->device, root_signature->vk_set_count)) - return E_INVALIDARG; - - for (i = 0; i < root_signature->vk_set_count; ++i) - { -- struct d3d12_descriptor_set_layout *layout = &root_signature->descriptor_set_layouts[i]; -- struct vk_binding_array *array = &context->vk_bindings[i]; -+ const struct vk_binding_array *array = &context->vk_bindings[i]; - - VKD3D_ASSERT(array->count); - -- if (FAILED(hr = vkd3d_create_descriptor_set_layout(root_signature->device, array->flags, array->count, -- array->unbounded_offset != UINT_MAX, array->bindings, &layout->vk_layout))) -+ if (FAILED(hr = d3d12_descriptor_set_layout_init(&root_signature->descriptor_set_layouts[i], -+ root_signature->device, array))) - return hr; -- layout->unbounded_offset = array->unbounded_offset; -- layout->table_index = array->table_index; - } - - return S_OK; -@@ -1518,7 +1576,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - HRESULT hr; - - memset(&context, 0, sizeof(context)); -- context.unbounded_offset = UINT_MAX; - - root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; - root_signature->refcount = 1; -@@ -1580,17 +1637,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa - sizeof(*root_signature->static_samplers)))) - goto fail; - -+ context.push_descriptor = vk_info->KHR_push_descriptor; - if (FAILED(hr = d3d12_root_signature_init_root_descriptors(root_signature, desc, &context))) - goto fail; -- -- /* We use KHR_push_descriptor for root descriptor parameters. */ -- if (vk_info->KHR_push_descriptor) -- { -- d3d12_root_signature_append_vk_binding_array(root_signature, -- VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, &context); -- } -- -- root_signature->main_set = root_signature->vk_set_count; -+ root_signature->main_set = !!context.push_descriptor_set; -+ context.push_descriptor = false; - - if (FAILED(hr = d3d12_root_signature_init_push_constants(root_signature, desc, - root_signature->push_constant_ranges, &root_signature->push_constant_range_count))) -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 97a99782d6a..8488d5db3fa 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -58,12 +58,17 @@ - #define VKD3D_MAX_VK_SYNC_OBJECTS 4u - #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u - #define VKD3D_MAX_DESCRIPTOR_SETS 64u -+/* Direct3D 12 binding tier 3 has a limit of "1,000,000+" CBVs, SRVs and UAVs. -+ * I am not sure what the "+" is supposed to mean: it probably hints that -+ * implementations may have an even higher limit, but that's pretty obvious, -+ * that table is for guaranteed minimum limits. */ -+#define VKD3D_MAX_DESCRIPTOR_SET_CBVS_SRVS_UAVS 1000000u - /* D3D12 binding tier 3 has a limit of 2048 samplers. */ - #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u --/* The main limitation here is the simple descriptor pool recycling scheme -- * requiring each pool to contain all descriptor types used by vkd3d. Limit -- * this number to prevent excessive pool memory use. */ - #define VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE (16 * 1024u) -+#define VKD3D_INITIAL_DESCRIPTORS_POOL_SIZE 1024u -+ -+#define VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT (VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER + 1) - - extern uint64_t object_global_serial_id; - -@@ -770,6 +775,25 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE - void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, - struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc); - -+static inline VkDescriptorType vk_descriptor_type_from_vkd3d_descriptor_type(enum vkd3d_shader_descriptor_type type, -+ bool is_buffer) -+{ -+ switch (type) -+ { -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: -+ return is_buffer ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: -+ return is_buffer ? VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: -+ return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; -+ case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: -+ return VK_DESCRIPTOR_TYPE_SAMPLER; -+ default: -+ FIXME("Unhandled descriptor range type type %#x.\n", type); -+ return VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; -+ } -+} -+ - enum vkd3d_vk_descriptor_set_index - { - VKD3D_SET_INDEX_SAMPLER, -@@ -899,6 +923,8 @@ struct d3d12_root_descriptor_table_range - unsigned int vk_binding_count; - uint32_t set; - uint32_t binding; -+ uint32_t image_set; -+ uint32_t image_binding; - - enum vkd3d_shader_descriptor_type type; - uint32_t descriptor_magic; -@@ -920,6 +946,7 @@ struct d3d12_root_constant - - struct d3d12_root_descriptor - { -+ uint32_t set; - uint32_t binding; - }; - -@@ -936,7 +963,9 @@ struct d3d12_root_parameter - - struct d3d12_descriptor_set_layout - { -+ enum vkd3d_shader_descriptor_type descriptor_type; - VkDescriptorSetLayout vk_layout; -+ unsigned int descriptor_count; - unsigned int unbounded_offset; - unsigned int table_index; - }; -@@ -1135,6 +1164,18 @@ struct vkd3d_buffer - VkDeviceMemory vk_memory; - }; - -+struct vkd3d_vk_descriptor_pool -+{ -+ unsigned int descriptor_count; -+ VkDescriptorPool vk_pool; -+}; -+ -+struct vkd3d_vk_descriptor_pool_array -+{ -+ struct vkd3d_vk_descriptor_pool *pools; -+ size_t capacity, count; -+}; -+ - /* ID3D12CommandAllocator */ - struct d3d12_command_allocator - { -@@ -1146,11 +1187,9 @@ struct d3d12_command_allocator - - VkCommandPool vk_command_pool; - -- VkDescriptorPool vk_descriptor_pool; -+ VkDescriptorPool vk_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; - -- VkDescriptorPool *free_descriptor_pools; -- size_t free_descriptor_pools_size; -- size_t free_descriptor_pool_count; -+ struct vkd3d_vk_descriptor_pool_array free_descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; - - VkRenderPass *passes; - size_t passes_size; -@@ -1160,9 +1199,8 @@ struct d3d12_command_allocator - size_t framebuffers_size; - size_t framebuffer_count; - -- VkDescriptorPool *descriptor_pools; -- size_t descriptor_pools_size; -- size_t descriptor_pool_count; -+ struct vkd3d_vk_descriptor_pool_array descriptor_pools[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; -+ unsigned int vk_pool_sizes[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; - - struct vkd3d_view **views; - size_t views_size; -@@ -1516,8 +1554,6 @@ struct vkd3d_desc_object_cache - size_t size; - }; - --#define VKD3D_DESCRIPTOR_POOL_COUNT 6 -- - /* ID3D12Device */ - struct d3d12_device - { -@@ -1536,8 +1572,7 @@ struct d3d12_device - struct vkd3d_desc_object_cache view_desc_cache; - struct vkd3d_desc_object_cache cbuffer_desc_cache; - -- VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; -- unsigned int vk_pool_count; -+ unsigned int vk_pool_limits[VKD3D_SHADER_DESCRIPTOR_TYPE_COUNT]; - struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; - bool use_vk_heaps; - --- -2.45.2 - diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-5827197246214a3b1a362f19a0ac4de426e.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-5827197246214a3b1a362f19a0ac4de426e.patch deleted file mode 100644 index 898a3f24..00000000 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-5827197246214a3b1a362f19a0ac4de426e.patch +++ /dev/null @@ -1,1915 +0,0 @@ -From f8ff05b86acf4c5b18f389ae877ce138cb00e7d6 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Sat, 14 Dec 2024 11:00:37 +1100 -Subject: [PATCH] Updated vkd3d to 5827197246214a3b1a362f19a0ac4de426e4a3e2. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 32 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 79 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 63 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 110 ++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 592 ++++++++++++------ - .../libs/vkd3d-shader/hlsl_constant_ops.c | 149 ++++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 50 +- - libs/vkd3d/libs/vkd3d-shader/msl.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 2 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 +- - 13 files changed, 766 insertions(+), 324 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index e7dd65d1fef..fbd5d7ffbd7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -633,7 +633,32 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, - return; - } - -+ /* Normally VSIR mandates that the register mask is a subset of the usage -+ * mask, and the usage mask is a subset of the signature mask. This is -+ * doesn't always happen with SM1-3 registers, because of the limited -+ * flexibility with expressing swizzles. -+ * -+ * For example it's easy to find shaders like this: -+ * ps_3_0 -+ * [...] -+ * dcl_texcoord0 v0 -+ * [...] -+ * texld r2.xyzw, v0.xyzw, s1.xyzw -+ * [...] -+ * -+ * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to -+ * compute the signature mask, but the texld instruction apparently uses all -+ * the components. Of course the last two components are ignored, but -+ * formally they seem to be used. So we end up with a signature element with -+ * mask .xy and usage mask .xyzw. -+ * -+ * In order to avoid this problem, when generating VSIR code with SM4 -+ * normalisation level we remove the unused components in the write mask. We -+ * don't do that when targetting the SM1 normalisation level (i.e., when -+ * disassembling) so as to generate the same disassembly code as native. */ - element->used_mask |= mask; -+ if (program->normalisation_level >= VSIR_NORMALISED_SM4) -+ element->used_mask &= element->mask; - } - - static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, -@@ -1265,6 +1290,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) - { - const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; -+ enum vsir_normalisation_level normalisation_level; - const uint32_t *code = compile_info->source.code; - size_t code_size = compile_info->source.size; - struct vkd3d_shader_version version; -@@ -1315,9 +1341,13 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st - sm1->start = &code[1]; - sm1->end = &code[token_count]; - -+ normalisation_level = VSIR_NORMALISED_SM1; -+ if (compile_info->target_type != VKD3D_SHADER_TARGET_D3D_ASM) -+ normalisation_level = VSIR_NORMALISED_SM4; -+ - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, -- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) -+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, normalisation_level)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index d76f9bcc772..4493602dfb7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -10356,7 +10356,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - /* Estimate instruction count to avoid reallocation in most shaders. */ - count = max(token_count, 400) - 400; - if (!vsir_program_init(program, compile_info, &version, -- (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) -+ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_NORMALISED_SM6)) - return VKD3D_ERROR_OUT_OF_MEMORY; - vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); - sm6->ptr = &sm6->start[1]; -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 113c7eee65f..ab6604bd703 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -2469,7 +2469,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, - if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; - -- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); - - vkd3d_glsl_generator_init(&generator, program, compile_info, - descriptor_info, combined_sampler_info, message_context); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index e7518404aa0..84da2fcbc9f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -1854,22 +1854,45 @@ struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct - return &store->node; - } - --struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, -+struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int component_count, - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_swizzle *swizzle; - struct hlsl_type *type; - -+ VKD3D_ASSERT(val->data_type->class <= HLSL_CLASS_VECTOR); -+ - if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) - return NULL; -- VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); -- if (components == 1) -+ if (component_count > 1) -+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); -+ else - type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); -+ init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); -+ hlsl_src_from_node(&swizzle->val, val); -+ swizzle->u.vector = s; -+ -+ return &swizzle->node; -+} -+ -+struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, -+ unsigned int component_count, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_swizzle *swizzle; -+ struct hlsl_type *type; -+ -+ VKD3D_ASSERT(val->data_type->class == HLSL_CLASS_MATRIX); -+ -+ if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) -+ return NULL; -+ if (component_count > 1) -+ type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, component_count); - else -- type = hlsl_get_vector_type(ctx, val->data_type->e.numeric.type, components); -+ type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); - init_node(&swizzle->node, HLSL_IR_SWIZZLE, type, loc); - hlsl_src_from_node(&swizzle->val, val); -- swizzle->swizzle = s; -+ swizzle->u.matrix = s; -+ - return &swizzle->node; - } - -@@ -2064,8 +2087,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type - return &jump->node; - } - --struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, -+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, -+ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, - unsigned int unroll_limit, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_loop *loop; -@@ -2076,6 +2099,10 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - hlsl_block_init(&loop->body); - hlsl_block_add_block(&loop->body, block); - -+ hlsl_block_init(&loop->iter); -+ if (iter) -+ hlsl_block_add_block(&loop->iter, iter); -+ - loop->unroll_type = unroll_type; - loop->unroll_limit = unroll_limit; - return &loop->node; -@@ -2231,14 +2258,21 @@ static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_ - - static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src) - { -+ struct hlsl_block iter, body; - struct hlsl_ir_node *dst; -- struct hlsl_block body; -+ -+ if (!clone_block(ctx, &iter, &src->iter, map)) -+ return NULL; - - if (!clone_block(ctx, &body, &src->body, map)) -+ { -+ hlsl_block_cleanup(&iter); - return NULL; -+ } - -- if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) -+ if (!(dst = hlsl_new_loop(ctx, &iter, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) - { -+ hlsl_block_cleanup(&iter); - hlsl_block_cleanup(&body); - return NULL; - } -@@ -2320,8 +2354,12 @@ static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr - static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx, - struct clone_instr_map *map, struct hlsl_ir_swizzle *src) - { -- return hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx, -- map_instr(map, src->val.node), &src->node.loc); -+ if (src->val.node->data_type->class == HLSL_CLASS_MATRIX) -+ return hlsl_new_matrix_swizzle(ctx, src->u.matrix, src->node.data_type->dimx, -+ map_instr(map, src->val.node), &src->node.loc); -+ else -+ return hlsl_new_swizzle(ctx, src->u.vector, src->node.data_type->dimx, -+ map_instr(map, src->val.node), &src->node.loc); - } - - static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr_map *map, -@@ -3401,11 +3439,12 @@ static void dump_ir_swizzle(struct vkd3d_string_buffer *buffer, const struct hls - { - vkd3d_string_buffer_printf(buffer, "."); - for (i = 0; i < swizzle->node.data_type->dimx; ++i) -- vkd3d_string_buffer_printf(buffer, "_m%u%u", (swizzle->swizzle >> i * 8) & 0xf, (swizzle->swizzle >> (i * 8 + 4)) & 0xf); -+ vkd3d_string_buffer_printf(buffer, "_m%u%u", -+ swizzle->u.matrix.components[i].y, swizzle->u.matrix.components[i].x); - } - else - { -- vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->swizzle, swizzle->node.data_type->dimx)); -+ vkd3d_string_buffer_printf(buffer, "%s", debug_hlsl_swizzle(swizzle->u.vector, swizzle->node.data_type->dimx)); - } - } - -@@ -3713,6 +3752,7 @@ static void free_ir_load(struct hlsl_ir_load *load) - static void free_ir_loop(struct hlsl_ir_loop *loop) - { - hlsl_block_cleanup(&loop->body); -+ hlsl_block_cleanup(&loop->iter); - vkd3d_free(loop); - } - -@@ -3967,8 +4007,8 @@ void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function - - uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) - { -+ unsigned int src_component = 0; - uint32_t ret = 0; -- unsigned int i; - - /* Leave replicate swizzles alone; some instructions need them. */ - if (swizzle == HLSL_SWIZZLE(X, X, X, X) -@@ -3977,13 +4017,10 @@ uint32_t hlsl_map_swizzle(uint32_t swizzle, unsigned int writemask) - || swizzle == HLSL_SWIZZLE(W, W, W, W)) - return swizzle; - -- for (i = 0; i < 4; ++i) -+ for (unsigned int dst_component = 0; dst_component < 4; ++dst_component) - { -- if (writemask & (1 << i)) -- { -- ret |= (swizzle & 3) << (i * 2); -- swizzle >>= 2; -- } -+ if (writemask & (1 << dst_component)) -+ hlsl_swizzle_set_component(&ret, dst_component, hlsl_swizzle_get_component(swizzle, src_component++)); - } - return ret; - } -@@ -4036,7 +4073,7 @@ uint32_t hlsl_combine_swizzles(uint32_t first, uint32_t second, unsigned int dim - for (i = 0; i < dim; ++i) - { - unsigned int s = hlsl_swizzle_get_component(second, i); -- ret |= hlsl_swizzle_get_component(first, s) << HLSL_SWIZZLE_SHIFT(i); -+ hlsl_swizzle_set_component(&ret, i, hlsl_swizzle_get_component(first, s)); - } - return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 5f05ceda004..3c0bbf0a3e2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -50,31 +50,17 @@ - * DEALINGS IN THE SOFTWARE. - */ - --#define HLSL_SWIZZLE_X (0u) --#define HLSL_SWIZZLE_Y (1u) --#define HLSL_SWIZZLE_Z (2u) --#define HLSL_SWIZZLE_W (3u) -- --#define HLSL_SWIZZLE(x, y, z, w) \ -- (((HLSL_SWIZZLE_ ## x) << 0) \ -- | ((HLSL_SWIZZLE_ ## y) << 2) \ -- | ((HLSL_SWIZZLE_ ## z) << 4) \ -- | ((HLSL_SWIZZLE_ ## w) << 6)) -- --#define HLSL_SWIZZLE_MASK (0x3u) --#define HLSL_SWIZZLE_SHIFT(idx) (2u * (idx)) -+#define HLSL_SWIZZLE VKD3D_SHADER_SWIZZLE - - static inline unsigned int hlsl_swizzle_get_component(uint32_t swizzle, unsigned int idx) - { -- return (swizzle >> HLSL_SWIZZLE_SHIFT(idx)) & HLSL_SWIZZLE_MASK; -+ return vsir_swizzle_get_component(swizzle, idx); - } - --static inline uint32_t vsir_swizzle_from_hlsl(uint32_t swizzle) -+static inline void hlsl_swizzle_set_component(uint32_t *swizzle, unsigned int idx, unsigned int component) - { -- return vkd3d_shader_create_swizzle(hlsl_swizzle_get_component(swizzle, 0), -- hlsl_swizzle_get_component(swizzle, 1), -- hlsl_swizzle_get_component(swizzle, 2), -- hlsl_swizzle_get_component(swizzle, 3)); -+ *swizzle &= ~(VKD3D_SHADER_SWIZZLE_MASK << VKD3D_SHADER_SWIZZLE_SHIFT(idx)); -+ *swizzle |= component << VKD3D_SHADER_SWIZZLE_SHIFT(idx); - } - - enum hlsl_type_class -@@ -659,21 +645,30 @@ struct hlsl_ir_if - struct hlsl_block else_block; - }; - --enum hlsl_ir_loop_unroll_type -+enum hlsl_loop_unroll_type -+{ -+ HLSL_LOOP_UNROLL, -+ HLSL_LOOP_FORCE_UNROLL, -+ HLSL_LOOP_FORCE_LOOP -+}; -+ -+enum hlsl_loop_type - { -- HLSL_IR_LOOP_UNROLL, -- HLSL_IR_LOOP_FORCE_UNROLL, -- HLSL_IR_LOOP_FORCE_LOOP -+ HLSL_LOOP_FOR, -+ HLSL_LOOP_WHILE, -+ HLSL_LOOP_DO_WHILE - }; - - struct hlsl_ir_loop - { - struct hlsl_ir_node node; -+ struct hlsl_block iter; - /* loop condition is stored in the body (as "if (!condition) break;") */ - struct hlsl_block body; -+ enum hlsl_loop_type type; - unsigned int next_index; /* liveness index of the end of the loop */ - unsigned int unroll_limit; -- enum hlsl_ir_loop_unroll_type unroll_type; -+ enum hlsl_loop_unroll_type unroll_type; - }; - - struct hlsl_ir_switch_case -@@ -793,7 +788,17 @@ struct hlsl_ir_swizzle - { - struct hlsl_ir_node node; - struct hlsl_src val; -- uint32_t swizzle; -+ union -+ { -+ uint32_t vector; -+ struct hlsl_matrix_swizzle -+ { -+ struct -+ { -+ uint8_t x, y; -+ } components[4]; -+ } matrix; -+ } u; - }; - - struct hlsl_ir_index -@@ -1550,8 +1555,11 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty - struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); --struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, -+ struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, -+ unsigned int unroll_limit, const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_matrix_swizzle(struct hlsl_ctx *ctx, struct hlsl_matrix_swizzle s, -+ unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -@@ -1642,6 +1650,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block); - bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); -+bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context); - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), - struct hlsl_block *block, void *context); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index afa41f4b1c2..ce9f7fd6a77 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -555,13 +555,6 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co - return true; - } - --enum loop_type --{ -- LOOP_FOR, -- LOOP_WHILE, -- LOOP_DO_WHILE --}; -- - static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const struct parse_attribute_list *attrs) - { - unsigned int i, j; -@@ -577,8 +570,8 @@ static void check_attribute_list_for_duplicates(struct hlsl_ctx *ctx, const stru - } - } - --static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, -- struct hlsl_block *cond, struct hlsl_block *iter) -+static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ enum hlsl_loop_type type, struct hlsl_block *cond) - { - struct hlsl_ir_node *instr, *next; - -@@ -588,8 +581,8 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block - { - struct hlsl_ir_if *iff = hlsl_ir_if(instr); - -- resolve_loop_continue(ctx, &iff->then_block, type, cond, iter); -- resolve_loop_continue(ctx, &iff->else_block, type, cond, iter); -+ resolve_loop_continue(ctx, &iff->then_block, type, cond); -+ resolve_loop_continue(ctx, &iff->else_block, type, cond); - } - else if (instr->type == HLSL_IR_JUMP) - { -@@ -599,7 +592,7 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block - if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) - continue; - -- if (type == LOOP_DO_WHILE) -+ if (type == HLSL_LOOP_DO_WHILE) - { - if (!hlsl_clone_block(ctx, &cond_block, cond)) - return; -@@ -610,13 +603,6 @@ static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block - } - list_move_before(&instr->entry, &cond_block.instrs); - } -- else if (type == LOOP_FOR) -- { -- if (!hlsl_clone_block(ctx, &cond_block, iter)) -- return; -- list_move_before(&instr->entry, &cond_block.instrs); -- } -- jump->type = HLSL_IR_JUMP_CONTINUE; - } - } - } -@@ -740,11 +726,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str - return res.number.u; - } - --static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, -+static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum hlsl_loop_type type, - const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, - struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) - { -- enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; -+ enum hlsl_loop_unroll_type unroll_type = HLSL_LOOP_UNROLL; - unsigned int i, unroll_limit = 0; - struct hlsl_ir_node *loop; - -@@ -775,11 +761,11 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - hlsl_block_cleanup(&expr); - } - -- unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; -+ unroll_type = HLSL_LOOP_FORCE_UNROLL; - } - else if (!strcmp(attr->name, "loop")) - { -- unroll_type = HLSL_IR_LOOP_FORCE_LOOP; -+ unroll_type = HLSL_LOOP_FORCE_LOOP; - } - else if (!strcmp(attr->name, "fastopt") - || !strcmp(attr->name, "allow_uav_condition")) -@@ -792,7 +778,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - } - } - -- resolve_loop_continue(ctx, body, type, cond, iter); -+ resolve_loop_continue(ctx, body, type, cond); - - if (!init && !(init = make_empty_block(ctx))) - goto oom; -@@ -800,15 +786,12 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - if (!append_conditional_break(ctx, cond)) - goto oom; - -- if (iter) -- hlsl_block_add_block(body, iter); -- -- if (type == LOOP_DO_WHILE) -+ if (type == HLSL_LOOP_DO_WHILE) - list_move_tail(&body->instrs, &cond->instrs); - else - list_move_head(&body->instrs, &cond->instrs); - -- if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) -+ if (!(loop = hlsl_new_loop(ctx, iter, body, unroll_type, unroll_limit, loc))) - goto oom; - hlsl_block_add_instr(init, loop); - -@@ -862,6 +845,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod - if (value->data_type->class == HLSL_CLASS_MATRIX) - { - /* Matrix swizzle */ -+ struct hlsl_matrix_swizzle s; - bool m_swizzle; - unsigned int inc, x, y; - -@@ -892,10 +876,11 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod - - if (x >= value->data_type->dimx || y >= value->data_type->dimy) - return NULL; -- swiz |= (y << 4 | x) << component * 8; -+ s.components[component].x = x; -+ s.components[component].y = y; - component++; - } -- return hlsl_new_swizzle(ctx, swiz, component, value, loc); -+ return hlsl_new_matrix_swizzle(ctx, s, component, value, loc); - } - - /* Vector swizzle */ -@@ -924,8 +909,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod - - if (s >= value->data_type->dimx) - return NULL; -- swiz |= s << component * 2; -- component++; -+ hlsl_swizzle_set_component(&swiz, component++, s); - } - if (valid) - return hlsl_new_swizzle(ctx, swiz, component, value, loc); -@@ -2102,8 +2086,8 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned - { - if (*writemask & (1 << i)) - { -- unsigned int s = (*swizzle >> (i * 2)) & 3; -- new_swizzle |= s << (bit++ * 2); -+ unsigned int s = hlsl_swizzle_get_component(*swizzle, i); -+ hlsl_swizzle_set_component(&new_swizzle, bit++, s); - if (new_writemask & (1 << s)) - return false; - new_writemask |= 1 << s; -@@ -2117,9 +2101,9 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned - { - for (j = 0; j < width; ++j) - { -- unsigned int s = (new_swizzle >> (j * 2)) & 3; -+ unsigned int s = hlsl_swizzle_get_component(new_swizzle, j); - if (s == i) -- inverted |= j << (bit++ * 2); -+ hlsl_swizzle_set_component(&inverted, bit++, j); - } - } - -@@ -2129,22 +2113,22 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned - return true; - } - --static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) -+static bool invert_swizzle_matrix(const struct hlsl_matrix_swizzle *swizzle, -+ uint32_t *ret_inverted, unsigned int *writemask, unsigned int *ret_width) - { -- /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. -- * components are indexed by their sources. i.e. the first component comes from the first -- * component of the rhs. */ -- unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; -+ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0; -+ struct hlsl_matrix_swizzle new_swizzle = {0}; - - /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ - for (i = 0; i < 4; ++i) - { - if (*writemask & (1 << i)) - { -- unsigned int s = (*swizzle >> (i * 8)) & 0xff; -- unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int x = swizzle->components[i].x; -+ unsigned int y = swizzle->components[i].y; - unsigned int idx = x + y * 4; -- new_swizzle |= s << (bit++ * 8); -+ -+ new_swizzle.components[bit++] = swizzle->components[i]; - if (new_writemask & (1 << idx)) - return false; - new_writemask |= 1 << idx; -@@ -2152,22 +2136,22 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un - } - width = bit; - -- /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the -- * incoming vector. */ -+ /* Then we invert the swizzle. The resulting swizzle uses a uint32_t -+ * vector format, because it's for the incoming vector. */ - bit = 0; - for (i = 0; i < 16; ++i) - { - for (j = 0; j < width; ++j) - { -- unsigned int s = (new_swizzle >> (j * 8)) & 0xff; -- unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int x = new_swizzle.components[j].x; -+ unsigned int y = new_swizzle.components[j].y; - unsigned int idx = x + y * 4; - if (idx == i) -- inverted |= j << (bit++ * 2); -+ hlsl_swizzle_set_component(&inverted, bit++, j); - } - } - -- *swizzle = inverted; -+ *ret_inverted = inverted; - *writemask = new_writemask; - *ret_width = width; - return true; -@@ -2221,28 +2205,34 @@ static bool add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struc - { - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); - struct hlsl_ir_node *new_swizzle; -- uint32_t s = swizzle->swizzle; -+ uint32_t s; - - VKD3D_ASSERT(!matrix_writemask); - - if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) - { -+ struct hlsl_matrix_swizzle ms = swizzle->u.matrix; -+ - if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) - { - hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); - return false; - } -- if (!invert_swizzle_matrix(&s, &writemask, &width)) -+ if (!invert_swizzle_matrix(&ms, &s, &writemask, &width)) - { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); - return false; - } - matrix_writemask = true; - } -- else if (!invert_swizzle(&s, &writemask, &width)) -+ else - { -- hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); -- return false; -+ s = swizzle->u.vector; -+ if (!invert_swizzle(&s, &writemask, &width)) -+ { -+ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); -+ return false; -+ } - } - - if (!(new_swizzle = hlsl_new_swizzle(ctx, s, width, rhs, &swizzle->node.loc))) -@@ -8831,25 +8821,25 @@ if_body: - loop_statement: - attribute_list_optional loop_scope_start KW_WHILE '(' expr ')' statement - { -- $$ = create_loop(ctx, LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); -+ $$ = create_loop(ctx, HLSL_LOOP_WHILE, &$1, NULL, $5, NULL, $7, &@3); - hlsl_pop_scope(ctx); - cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_DO statement KW_WHILE '(' expr ')' ';' - { -- $$ = create_loop(ctx, LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); -+ $$ = create_loop(ctx, HLSL_LOOP_DO_WHILE, &$1, NULL, $7, NULL, $4, &@3); - hlsl_pop_scope(ctx); - cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_FOR '(' expr_statement expr_statement expr_optional ')' statement - { -- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); -+ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); - hlsl_pop_scope(ctx); - cleanup_parse_attribute_list(&$1); - } - | attribute_list_optional loop_scope_start KW_FOR '(' declaration expr_statement expr_optional ')' statement - { -- $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@3); -+ $$ = create_loop(ctx, HLSL_LOOP_FOR, &$1, $5, $6, $7, $9, &@3); - hlsl_pop_scope(ctx); - cleanup_parse_attribute_list(&$1); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index e6924aa70ef..c3c8e5d55b3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -1076,7 +1076,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - struct hlsl_deref var_deref; - struct hlsl_type *matrix_type; - struct hlsl_ir_var *var; -- unsigned int x, y, k, i; -+ unsigned int k, i; - - if (instr->type != HLSL_IR_SWIZZLE) - return false; -@@ -1094,9 +1094,7 @@ static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins - struct hlsl_block store_block; - struct hlsl_ir_node *load; - -- y = (swizzle->swizzle >> (8 * i + 4)) & 0xf; -- x = (swizzle->swizzle >> 8 * i) & 0xf; -- k = y * matrix_type->dimx + x; -+ k = swizzle->u.matrix.components[i].y * matrix_type->dimx + swizzle->u.matrix.components[i].x; - - if (!(load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc))) - return false; -@@ -1359,8 +1357,10 @@ struct copy_propagation_var_def - - struct copy_propagation_state - { -- struct rb_tree var_defs; -- struct copy_propagation_state *parent; -+ struct rb_tree *scope_var_defs; -+ size_t scope_count, scopes_capacity; -+ struct hlsl_ir_node *stop; -+ bool stopped; - }; - - static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry) -@@ -1382,6 +1382,38 @@ static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *conte - vkd3d_free(var_def); - } - -+static size_t copy_propagation_push_scope(struct copy_propagation_state *state, struct hlsl_ctx *ctx) -+{ -+ if (!(hlsl_array_reserve(ctx, (void **)&state->scope_var_defs, &state->scopes_capacity, -+ state->scope_count + 1, sizeof(*state->scope_var_defs)))) -+ return false; -+ -+ rb_init(&state->scope_var_defs[state->scope_count++], copy_propagation_var_def_compare); -+ -+ return state->scope_count; -+} -+ -+static size_t copy_propagation_pop_scope(struct copy_propagation_state *state) -+{ -+ rb_destroy(&state->scope_var_defs[--state->scope_count], copy_propagation_var_def_destroy, NULL); -+ -+ return state->scope_count; -+} -+ -+static bool copy_propagation_state_init(struct copy_propagation_state *state, struct hlsl_ctx *ctx) -+{ -+ memset(state, 0, sizeof(*state)); -+ -+ return copy_propagation_push_scope(state, ctx); -+} -+ -+static void copy_propagation_state_destroy(struct copy_propagation_state *state) -+{ -+ while (copy_propagation_pop_scope(state)); -+ -+ vkd3d_free(state->scope_var_defs); -+} -+ - static struct copy_propagation_value *copy_propagation_get_value_at_time( - struct copy_propagation_component_trace *trace, unsigned int time) - { -@@ -1399,9 +1431,10 @@ static struct copy_propagation_value *copy_propagation_get_value_at_time( - static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state, - const struct hlsl_ir_var *var, unsigned int component, unsigned int time) - { -- for (; state; state = state->parent) -+ for (size_t i = state->scope_count - 1; i < state->scope_count; i--) - { -- struct rb_entry *entry = rb_get(&state->var_defs, var); -+ struct rb_tree *tree = &state->scope_var_defs[i]; -+ struct rb_entry *entry = rb_get(tree, var); - if (entry) - { - struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry); -@@ -1427,7 +1460,8 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co - static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx, - struct copy_propagation_state *state, struct hlsl_ir_var *var) - { -- struct rb_entry *entry = rb_get(&state->var_defs, var); -+ struct rb_tree *tree = &state->scope_var_defs[state->scope_count - 1]; -+ struct rb_entry *entry = rb_get(tree, var); - struct copy_propagation_var_def *var_def; - unsigned int component_count = hlsl_type_component_count(var->data_type); - int res; -@@ -1440,7 +1474,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h - - var_def->var = var; - -- res = rb_put(&state->var_defs, var, &var_def->entry); -+ res = rb_put(tree, var, &var_def->entry); - VKD3D_ASSERT(!res); - - return var_def; -@@ -1597,7 +1631,7 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx, - var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count)); - return false; - } -- ret_swizzle |= value->component << HLSL_SWIZZLE_SHIFT(i); -+ hlsl_swizzle_set_component(&ret_swizzle, i, value->component); - } - - TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n", -@@ -1721,10 +1755,10 @@ static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx, - return false; - load = hlsl_ir_load(swizzle->val.node); - -- if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->swizzle, &swizzle->node)) -+ if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node)) - return true; - -- if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->swizzle, &swizzle->node)) -+ if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node)) - return true; - - return false; -@@ -1820,18 +1854,6 @@ static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_s - } - } - --static void copy_propagation_state_init(struct hlsl_ctx *ctx, struct copy_propagation_state *state, -- struct copy_propagation_state *parent) --{ -- rb_init(&state->var_defs, copy_propagation_var_def_compare); -- state->parent = parent; --} -- --static void copy_propagation_state_destroy(struct copy_propagation_state *state) --{ -- rb_destroy(&state->var_defs, copy_propagation_var_def_destroy, NULL); --} -- - static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state, - struct hlsl_block *block, unsigned int time) - { -@@ -1900,16 +1922,19 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b - static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff, - struct copy_propagation_state *state) - { -- struct copy_propagation_state inner_state; - bool progress = false; - -- copy_propagation_state_init(ctx, &inner_state, state); -- progress |= copy_propagation_transform_block(ctx, &iff->then_block, &inner_state); -- copy_propagation_state_destroy(&inner_state); -+ copy_propagation_push_scope(state, ctx); -+ progress |= copy_propagation_transform_block(ctx, &iff->then_block, state); -+ if (state->stopped) -+ return progress; -+ copy_propagation_pop_scope(state); - -- copy_propagation_state_init(ctx, &inner_state, state); -- progress |= copy_propagation_transform_block(ctx, &iff->else_block, &inner_state); -- copy_propagation_state_destroy(&inner_state); -+ copy_propagation_push_scope(state, ctx); -+ progress |= copy_propagation_transform_block(ctx, &iff->else_block, state); -+ if (state->stopped) -+ return progress; -+ copy_propagation_pop_scope(state); - - /* Ideally we'd invalidate the outer state looking at what was - * touched in the two inner states, but this doesn't work for -@@ -1924,14 +1949,16 @@ static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if - static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop, - struct copy_propagation_state *state) - { -- struct copy_propagation_state inner_state; - bool progress = false; - - copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index); -+ copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index); - -- copy_propagation_state_init(ctx, &inner_state, state); -- progress |= copy_propagation_transform_block(ctx, &loop->body, &inner_state); -- copy_propagation_state_destroy(&inner_state); -+ copy_propagation_push_scope(state, ctx); -+ progress |= copy_propagation_transform_block(ctx, &loop->body, state); -+ if (state->stopped) -+ return progress; -+ copy_propagation_pop_scope(state); - - return progress; - } -@@ -1939,15 +1966,16 @@ static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_l - static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s, - struct copy_propagation_state *state) - { -- struct copy_propagation_state inner_state; - struct hlsl_ir_switch_case *c; - bool progress = false; - - LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) - { -- copy_propagation_state_init(ctx, &inner_state, state); -- progress |= copy_propagation_transform_block(ctx, &c->body, &inner_state); -- copy_propagation_state_destroy(&inner_state); -+ copy_propagation_push_scope(state, ctx); -+ progress |= copy_propagation_transform_block(ctx, &c->body, state); -+ if (state->stopped) -+ return progress; -+ copy_propagation_pop_scope(state); - } - - LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -@@ -1966,6 +1994,12 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b - - LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) - { -+ if (instr == state->stop) -+ { -+ state->stopped = true; -+ return progress; -+ } -+ - switch (instr->type) - { - case HLSL_IR_LOAD: -@@ -2003,6 +2037,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b - default: - break; - } -+ -+ if (state->stopped) -+ return progress; - } - - return progress; -@@ -2015,7 +2052,7 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc - - index_instructions(block, 2); - -- copy_propagation_state_init(ctx, &state, NULL); -+ copy_propagation_state_init(&state, ctx); - - progress = copy_propagation_transform_block(ctx, block, &state); - -@@ -2403,8 +2440,8 @@ static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - struct hlsl_ir_node *new_swizzle; - uint32_t combined_swizzle; - -- combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->swizzle, -- swizzle->swizzle, instr->data_type->dimx); -+ combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector, -+ swizzle->u.vector, instr->data_type->dimx); - next_instr = hlsl_ir_swizzle(next_instr)->val.node; - - if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle, instr->data_type->dimx, next_instr, &instr->loc))) -@@ -2431,7 +2468,7 @@ static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *i - return false; - - for (i = 0; i < instr->data_type->dimx; ++i) -- if (hlsl_swizzle_get_component(swizzle->swizzle, i) != i) -+ if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i) - return false; - - hlsl_replace_node(instr, swizzle->val.node); -@@ -6569,6 +6606,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, body, NULL); - progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); - progress |= hlsl_copy_propagation_execute(ctx, body); - progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); -@@ -6786,7 +6824,6 @@ static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t d - - swizzle = hlsl_swizzle_from_writemask(src_writemask); - swizzle = hlsl_map_swizzle(swizzle, dst_writemask); -- swizzle = vsir_swizzle_from_hlsl(swizzle); - return swizzle; - } - -@@ -7855,9 +7892,8 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, - dst_param->write_mask = instr->reg.writemask; - - swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); -- swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->swizzle, instr->data_type->dimx); -+ swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->dimx); - swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask); -- swizzle = vsir_swizzle_from_hlsl(swizzle); - - src_param = &ins->src[0]; - VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); -@@ -8015,7 +8051,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -@@ -9886,7 +9922,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; - -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -@@ -9951,39 +9987,129 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - generate_vsir_scan_global_flags(ctx, program, func); - } - --static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -- struct hlsl_block **found_block) -+static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -+ bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *node; -+ struct hlsl_ir_node *const_node, *store; - -- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ if (!(const_node = hlsl_new_bool_constant(ctx, val, loc))) -+ return false; -+ hlsl_block_add_instr(block, const_node); -+ -+ if (!(store = hlsl_new_simple_store(ctx, var, const_node))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ return true; -+} -+ -+static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued); -+ -+static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, -+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) -+{ -+ struct hlsl_ir_jump *jump; -+ struct hlsl_ir_var *var; -+ struct hlsl_block draft; -+ struct hlsl_ir_if *iff; -+ -+ if (node->type == HLSL_IR_IF) - { -- if (node == stop_point) -- return NULL; -+ iff = hlsl_ir_if(node); -+ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued)) -+ return true; -+ if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued)) -+ return true; -+ return false; -+ } - -- if (node->type == HLSL_IR_IF) -- { -- struct hlsl_ir_if *iff = hlsl_ir_if(node); -- struct hlsl_ir_jump *jump = NULL; -+ if (node->type == HLSL_IR_JUMP) -+ { -+ jump = hlsl_ir_jump(node); -+ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK) -+ return false; - -- if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) -- return jump; -- if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) -- return jump; -- } -- else if (node->type == HLSL_IR_JUMP) -- { -- struct hlsl_ir_jump *jump = hlsl_ir_jump(node); -+ hlsl_block_init(&draft); - -- if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) -- { -- *found_block = block; -- return jump; -- } -- } -+ if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE) -+ var = loop_continued; -+ else -+ var = loop_broken; -+ -+ if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc)) -+ return false; -+ -+ list_move_before(&jump->node.entry, &draft.instrs); -+ list_remove(&jump->node.entry); -+ hlsl_free_instr(&jump->node); -+ -+ return true; - } - -- return NULL; -+ return false; -+} -+ -+static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx, -+ struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *cond, *iff; -+ struct hlsl_block then_block; -+ struct hlsl_ir_load *load; -+ -+ hlsl_block_init(&then_block); -+ -+ if (!(load = hlsl_new_var_load(ctx, var, loc))) -+ return NULL; -+ hlsl_block_add_instr(dst, &load->node); -+ -+ if (!(cond = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, &load->node, loc))) -+ return NULL; -+ hlsl_block_add_instr(dst, cond); -+ -+ if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc))) -+ return NULL; -+ hlsl_block_add_instr(dst, iff); -+ -+ return hlsl_ir_if(iff); -+} -+ -+static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) -+{ -+ struct hlsl_ir_node *node, *next; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ struct hlsl_ir_if *broken_check, *continued_check; -+ struct hlsl_block draft; -+ -+ if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued)) -+ continue; -+ -+ if (&next->entry == &block->instrs) -+ return true; -+ -+ hlsl_block_init(&draft); -+ -+ broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc); -+ continued_check = loop_unrolling_generate_var_check(ctx, -+ &broken_check->then_block, loop_continued, &next->loc); -+ -+ list_move_before(&next->entry, &draft.instrs); -+ -+ list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs)); -+ -+ return true; -+ } -+ -+ return false; -+} -+ -+static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued) -+{ -+ while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued)); - } - - static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) -@@ -9993,7 +10119,7 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru - return loop->unroll_limit; - - /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ -- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) - return 1024; - - /* SM4 limits implicit unrolling to 254 iterations. */ -@@ -10004,167 +10130,279 @@ static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, stru - return 1024; - } - --static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -- struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) -+static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct copy_propagation_state *state, unsigned int *index) - { -- unsigned int max_iterations, i; -+ size_t scopes_depth = state->scope_count - 1; -+ unsigned int current_index; -+ bool progress; -+ -+ do -+ { -+ state->stopped = false; -+ for (size_t i = state->scope_count; scopes_depth < i; --i) -+ copy_propagation_pop_scope(state); -+ copy_propagation_push_scope(state, ctx); -+ -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL); -+ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL); -+ -+ current_index = index_instructions(block, *index); -+ progress |= copy_propagation_transform_block(ctx, block, state); -+ -+ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL); -+ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL); -+ } while (progress); -+ -+ *index = current_index; -+} -+ -+static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var) -+{ -+ struct copy_propagation_value *v; -+ -+ if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX)) -+ || v->node->type != HLSL_IR_CONSTANT) -+ return false; -+ -+ return hlsl_ir_constant(v->node)->value.u[0].u; -+} -+ -+static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop) -+{ -+ struct hlsl_block draft, tmp_dst, loop_body; -+ struct hlsl_ir_var *broken, *continued; -+ unsigned int max_iterations, i, index; -+ struct copy_propagation_state state; -+ struct hlsl_ir_if *target_if; -+ -+ if (!(broken = hlsl_new_synthetic_var(ctx, "broken", -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) -+ goto fail; -+ -+ if (!(continued = hlsl_new_synthetic_var(ctx, "continued", -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc))) -+ goto fail; -+ -+ hlsl_block_init(&draft); -+ hlsl_block_init(&tmp_dst); - - max_iterations = loop_unrolling_get_max_iterations(ctx, loop); -+ copy_propagation_state_init(&state, ctx); -+ index = 2; -+ state.stop = &loop->node; -+ loop_unrolling_simplify(ctx, block, &state, &index); -+ state.stopped = false; -+ index = loop->node.index; -+ -+ if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc)) -+ goto fail; -+ hlsl_block_add_block(&draft, &tmp_dst); -+ -+ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) -+ goto fail; -+ hlsl_block_add_block(&draft, &tmp_dst); -+ -+ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) -+ goto fail; -+ state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry); -+ hlsl_block_add_block(&draft, &tmp_dst); -+ -+ copy_propagation_push_scope(&state, ctx); -+ loop_unrolling_simplify(ctx, &draft, &state, &index); -+ -+ /* As an optimization, we only remove jumps from the loop's body once. */ -+ if (!hlsl_clone_block(ctx, &loop_body, &loop->body)) -+ goto fail; -+ loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued); - - for (i = 0; i < max_iterations; ++i) - { -- struct hlsl_block tmp_dst, *jump_block; -- struct hlsl_ir_jump *jump = NULL; -+ copy_propagation_push_scope(&state, ctx); - -- if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) -- return false; -- list_move_before(&loop->node.entry, &tmp_dst.instrs); -- hlsl_block_cleanup(&tmp_dst); -+ if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc)) -+ goto fail; -+ hlsl_block_add_block(&target_if->then_block, &tmp_dst); - -- hlsl_run_const_passes(ctx, block); -+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body)) -+ goto fail; -+ hlsl_block_add_block(&target_if->then_block, &tmp_dst); - -- if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) -- { -- enum hlsl_ir_jump_type type = jump->type; -+ loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index); - -- if (jump_block != loop_parent) -- { -- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -- hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, -- "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); -- return false; -- } -+ if (loop_unrolling_check_val(&state, broken)) -+ break; - -- list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); -- hlsl_block_cleanup(&tmp_dst); -+ if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc))) -+ goto fail; -+ hlsl_block_add_block(&draft, &tmp_dst); - -- if (type == HLSL_IR_JUMP_BREAK) -- break; -- } -- } -+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter)) -+ goto fail; -+ hlsl_block_add_block(&target_if->then_block, &tmp_dst); -+ } - - /* Native will not emit an error if max_iterations has been reached with an - * explicit limit. It also will not insert a loop if there are iterations left - * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ - if (!loop->unroll_limit && i == max_iterations) - { -- if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL) - hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, - "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); -- return false; -+ goto fail; - } - -+ hlsl_block_cleanup(&loop_body); -+ copy_propagation_state_destroy(&state); -+ -+ list_move_before(&loop->node.entry, &draft.instrs); -+ hlsl_block_cleanup(&draft); - list_remove(&loop->node.entry); - hlsl_free_instr(&loop->node); - - return true; -+ -+fail: -+ hlsl_block_cleanup(&loop_body); -+ copy_propagation_state_destroy(&state); -+ hlsl_block_cleanup(&draft); -+ -+ return false; - } - --/* -- * loop_unrolling_find_unrollable_loop() is not the normal way to do things; -- * normal passes simply iterate over the whole block and apply a transformation -- * to every relevant instruction. However, loop unrolling can fail, and we want -- * to leave the loop in its previous state in that case. That isn't a problem by -- * itself, except that loop unrolling needs copy-prop in order to work properly, -- * and copy-prop state at the time of the loop depends on the rest of the program -- * up to that point. This means we need to clone the whole program, and at that -- * point we have to search it again anyway to find the clone of the loop we were -- * going to unroll. -- * -- * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop -- * up until the loop instruction, clone just that loop, then use copyprop again -- * with the saved state after unrolling. However, copyprop currently isn't built -- * for that yet [notably, it still relies on indices]. Note also this still doesn't -- * really let us use transform_ir() anyway [since we don't have a good way to say -- * "copyprop from the beginning of the program up to the instruction we're -- * currently processing" from the callback]; we'd have to use a dedicated -- * recursive function instead. */ --static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -- struct hlsl_block **containing_block) -+static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) - { -- struct hlsl_ir_node *instr; -+ struct hlsl_block *program = context; -+ struct hlsl_ir_loop *loop; - -- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ if (node->type != HLSL_IR_LOOP) -+ return true; -+ -+ loop = hlsl_ir_loop(node); -+ -+ if (loop->unroll_type != HLSL_LOOP_UNROLL && loop->unroll_type != HLSL_LOOP_FORCE_UNROLL) -+ return true; -+ -+ if (!loop_unrolling_unroll_loop(ctx, program, loop)) -+ loop->unroll_type = HLSL_LOOP_FORCE_LOOP; -+ -+ return true; -+} -+ -+/* We could handle this at parse time. However, loop unrolling often needs to -+ * know the value of variables modified in the "iter" block. It is possible to -+ * detect that all exit paths of a loop body modify such variables in the same -+ * way, but difficult, and d3dcompiler does not attempt to do so. -+ * In fact, d3dcompiler is capable of unrolling the following loop: -+ * for (int i = 0; i < 10; ++i) -+ * { -+ * if (some_uniform > 4) -+ * continue; -+ * } -+ * but cannot unroll the same loop with "++i" moved to each exit path: -+ * for (int i = 0; i < 10;) -+ * { -+ * if (some_uniform > 4) -+ * { -+ * ++i; -+ * continue; -+ * } -+ * ++i; -+ * } -+ */ -+static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context) -+{ -+ struct hlsl_ir_loop *loop; -+ -+ if (node->type != HLSL_IR_LOOP) -+ return true; -+ -+ loop = hlsl_ir_loop(node); -+ -+ hlsl_block_add_block(&loop->body, &loop->iter); -+ return true; -+} -+ -+static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop) -+{ -+ struct hlsl_ir_node *node; -+ -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { -- switch (instr->type) -+ switch (node->type) - { - case HLSL_IR_LOOP: - { -- struct hlsl_ir_loop *nested_loop; -- struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -- -- if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) -- return nested_loop; -- -- if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -- { -- *containing_block = block; -- return loop; -- } -+ struct hlsl_ir_loop *loop = hlsl_ir_loop(node); - -+ resolve_continues(ctx, &loop->body, loop); - break; - } - case HLSL_IR_IF: - { -- struct hlsl_ir_loop *loop; -- struct hlsl_ir_if *iff = hlsl_ir_if(instr); -- -- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) -- return loop; -- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) -- return loop; -- -+ struct hlsl_ir_if *iff = hlsl_ir_if(node); -+ resolve_continues(ctx, &iff->then_block, last_loop); -+ resolve_continues(ctx, &iff->else_block, last_loop); - break; - } - case HLSL_IR_SWITCH: - { -- struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -+ struct hlsl_ir_switch *s = hlsl_ir_switch(node); - struct hlsl_ir_switch_case *c; -- struct hlsl_ir_loop *loop; - - LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) - { -- if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) -- return loop; -+ resolve_continues(ctx, &c->body, last_loop); - } - - break; - } -+ case HLSL_IR_JUMP: -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); -+ -+ if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE) -+ break; -+ -+ if (last_loop->type == HLSL_LOOP_FOR) -+ { -+ struct hlsl_block draft; -+ -+ if (!hlsl_clone_block(ctx, &draft, &last_loop->iter)) -+ return; -+ -+ list_move_before(&node->entry, &draft.instrs); -+ hlsl_block_cleanup(&draft); -+ } -+ -+ jump->type = HLSL_IR_JUMP_CONTINUE; -+ break; -+ } - default: - break; - } - } -- -- return NULL; - } - --static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) -+static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block) - { -- while (true) -- { -- struct hlsl_block clone, *containing_block; -- struct hlsl_ir_loop *loop, *cloned_loop; -- -- if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) -- return; -- -- if (!hlsl_clone_block(ctx, &clone, block)) -- return; -- -- cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); -- VKD3D_ASSERT(cloned_loop); -+ bool progress; - -- if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) -- { -- hlsl_block_cleanup(&clone); -- loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; -- continue; -- } -+ /* These are required by copy propagation, which in turn is required for -+ * unrolling. */ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, split_array_copies, block, NULL); -+ progress |= hlsl_transform_ir(ctx, split_struct_copies, block, NULL); -+ } while (progress); -+ hlsl_transform_ir(ctx, split_matrix_copies, block, NULL); - -- hlsl_block_cleanup(block); -- hlsl_block_init(block); -- hlsl_block_add_block(block, &clone); -- } -+ hlsl_transform_ir(ctx, unroll_loops, block, block); -+ resolve_continues(ctx, block, NULL); -+ hlsl_transform_ir(ctx, resolve_loops, block, NULL); - } - - static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -@@ -10413,7 +10651,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, - hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); - } - -- transform_unroll_loops(ctx, body); -+ loop_unrolling_execute(ctx, body); - hlsl_run_const_passes(ctx, body); - - remove_unreachable_code(ctx, body); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 716adb15f08..cd7cd2fe6a3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -220,7 +220,9 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - break; - - case HLSL_TYPE_BOOL: -- /* Casts to bool should have already been lowered. */ -+ dst->u[k].u = u ? ~0u : 0u; -+ break; -+ - default: - vkd3d_unreachable(); - } -@@ -1544,6 +1546,149 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - return false; - } - -+static bool is_op_associative(enum hlsl_ir_expr_op op, enum hlsl_base_type type) -+{ -+ switch (op) -+ { -+ case HLSL_OP2_ADD: -+ case HLSL_OP2_MUL: -+ return type == HLSL_TYPE_INT || type == HLSL_TYPE_UINT; -+ -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_BIT_XOR: -+ case HLSL_OP2_LOGIC_AND: -+ case HLSL_OP2_LOGIC_OR: -+ case HLSL_OP2_MAX: -+ case HLSL_OP2_MIN: -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+static bool is_op_commutative(enum hlsl_ir_expr_op op) -+{ -+ switch (op) -+ { -+ case HLSL_OP2_ADD: -+ case HLSL_OP2_BIT_AND: -+ case HLSL_OP2_BIT_OR: -+ case HLSL_OP2_BIT_XOR: -+ case HLSL_OP2_DOT: -+ case HLSL_OP2_LOGIC_AND: -+ case HLSL_OP2_LOGIC_OR: -+ case HLSL_OP2_MAX: -+ case HLSL_OP2_MIN: -+ case HLSL_OP2_MUL: -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+bool hlsl_normalize_binary_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *arg1 , *arg2; -+ struct hlsl_ir_expr *expr; -+ enum hlsl_base_type type; -+ enum hlsl_ir_expr_op op; -+ bool progress = false; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ -+ if (instr->data_type->class > HLSL_CLASS_VECTOR) -+ return false; -+ -+ arg1 = expr->operands[0].node; -+ arg2 = expr->operands[1].node; -+ type = instr->data_type->e.numeric.type; -+ op = expr->op; -+ -+ if (!arg1 || !arg2) -+ return false; -+ -+ if (is_op_commutative(op) && arg1->type == HLSL_IR_CONSTANT && arg2->type != HLSL_IR_CONSTANT) -+ { -+ /* a OP x -> x OP a */ -+ struct hlsl_ir_node *tmp = arg1; -+ -+ arg1 = arg2; -+ arg2 = tmp; -+ progress = true; -+ } -+ -+ if (is_op_associative(op, type)) -+ { -+ struct hlsl_ir_expr *e1 = arg1->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg1) : NULL; -+ struct hlsl_ir_expr *e2 = arg2->type == HLSL_IR_EXPR ? hlsl_ir_expr(arg2) : NULL; -+ -+ if (e1 && e1->op == op && e1->operands[0].node->type != HLSL_IR_CONSTANT -+ && e1->operands[1].node->type == HLSL_IR_CONSTANT) -+ { -+ if (arg2->type == HLSL_IR_CONSTANT) -+ { -+ /* (x OP a) OP b -> x OP (a OP b) */ -+ struct hlsl_ir_node *ab; -+ -+ if (!(ab = hlsl_new_binary_expr(ctx, op, e1->operands[1].node, arg2))) -+ return false; -+ list_add_before(&instr->entry, &ab->entry); -+ -+ arg1 = e1->operands[0].node; -+ arg2 = ab; -+ progress = true; -+ } -+ else if (is_op_commutative(op)) -+ { -+ /* (x OP a) OP y -> (x OP y) OP a */ -+ struct hlsl_ir_node *xy; -+ -+ if (!(xy = hlsl_new_binary_expr(ctx, op, e1->operands[0].node, arg2))) -+ return false; -+ list_add_before(&instr->entry, &xy->entry); -+ -+ arg1 = xy; -+ arg2 = e1->operands[1].node; -+ progress = true; -+ } -+ } -+ -+ if (!progress && arg1->type != HLSL_IR_CONSTANT && e2 && e2->op == op -+ && e2->operands[0].node->type != HLSL_IR_CONSTANT && e2->operands[1].node->type == HLSL_IR_CONSTANT) -+ { -+ /* x OP (y OP a) -> (x OP y) OP a */ -+ struct hlsl_ir_node *xy; -+ -+ if (!(xy = hlsl_new_binary_expr(ctx, op, arg1, e2->operands[0].node))) -+ return false; -+ list_add_before(&instr->entry, &xy->entry); -+ -+ arg1 = xy; -+ arg2 = e2->operands[1].node; -+ progress = true; -+ } -+ -+ } -+ -+ if (progress) -+ { -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; -+ struct hlsl_ir_node *res; -+ -+ if (!(res = hlsl_new_expr(ctx, op, operands, instr->data_type, &instr->loc))) -+ return false; -+ list_add_before(&instr->entry, &res->entry); -+ hlsl_replace_node(instr, res); -+ } -+ -+ return progress; -+} -+ - bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct hlsl_constant_value value; -@@ -1560,7 +1705,7 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst - src = hlsl_ir_constant(swizzle->val.node); - - for (i = 0; i < swizzle->node.data_type->dimx; ++i) -- value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->swizzle, i)]; -+ value.u[i] = src->value.u[hlsl_swizzle_get_component(swizzle->u.vector, i)]; - - if (!(dst = hlsl_new_constant(ctx, instr->data_type, &value, &instr->loc))) - return false; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b3442ec92ae..e6d90e14212 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -1582,7 +1582,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - enum vkd3d_result ret; - unsigned int i, j; - -- VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM4); - - if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) - { -@@ -2340,7 +2340,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - - program->instructions = normaliser.instructions; - program->use_vocp = normaliser.use_vocp; -- program->normalisation_level = VSIR_FULLY_NORMALISED_IO; -+ program->normalisation_level = VSIR_NORMALISED_SM6; - return VKD3D_OK; - } - -@@ -7210,6 +7210,7 @@ static const struct shader_signature *vsir_signature_from_register_type(struct v - enum vkd3d_shader_register_type register_type, bool *has_control_point, unsigned int *control_point_count) - { - *has_control_point = false; -+ *control_point_count = 0; - - switch (register_type) - { -@@ -7233,7 +7234,7 @@ static const struct shader_signature *vsir_signature_from_register_type(struct v - { - case VKD3D_SHADER_TYPE_HULL: - if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE -- || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) -+ || ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) - { - *has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - *control_point_count = ctx->program->output_control_point_count; -@@ -7275,7 +7276,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, const stru - signature = vsir_signature_from_register_type(ctx, reg->type, &has_control_point, &control_point_count); - VKD3D_ASSERT(signature); - -- if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) -+ if (ctx->program->normalisation_level < VSIR_NORMALISED_SM6) - { - /* Indices are [register] or [control point, register]. Both are - * allowed to have a relative address. */ -@@ -8097,29 +8098,20 @@ static void vsir_validate_signature_element(struct validation_context *ctx, - "element %u of %s signature: Non-contiguous mask %#x.", - idx, signature_type_name, element->mask); - -- /* Here we'd likely want to validate that the usage mask is a subset of the -- * signature mask. Unfortunately the D3DBC parser sometimes violates this. -- * For example I've seen a shader like this: -- * ps_3_0 -- * [...] -- * dcl_texcoord0 v0 -- * [...] -- * texld r2.xyzw, v0.xyzw, s1.xyzw -- * [...] -- * -- * The dcl_textcoord0 instruction secretly has a .xy mask, which is used to -- * compute the signature mask, but the texld instruction apparently uses all -- * the components. Of course the last two components are ignored, but -- * formally they seem to be used. So we end up with a signature element with -- * mask .xy and usage mask .xyzw. -- * -- * The correct fix would probably be to make the D3DBC parser aware of which -- * components are really used for each instruction, but that would take some -- * time. */ -- if (element->used_mask & ~0xf) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -- "element %u of %s signature: Invalid usage mask %#x.", -- idx, signature_type_name, element->used_mask); -+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM4) -+ { -+ if ((element->used_mask & element->mask) != element->used_mask) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid usage mask %#x with mask %#x.", -+ idx, signature_type_name, element->used_mask, element->mask); -+ } -+ else -+ { -+ if (element->used_mask & ~0xf) -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "element %u of %s signature: Invalid usage mask %#x.", -+ idx, signature_type_name, element->used_mask); -+ } - - switch (element->sysval_semantic) - { -@@ -8373,7 +8365,7 @@ static void vsir_validate_signature(struct validation_context *ctx, const struct - } - - /* After I/O normalisation tessellation factors are merged in a single array. */ -- if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) -+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) - { - expected_outer_count = min(1, expected_outer_count); - expected_inner_count = min(1, expected_inner_count); -@@ -8567,7 +8559,7 @@ static void vsir_validate_dcl_index_range(struct validation_context *ctx, - const struct shader_signature *signature; - bool has_control_point; - -- if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) -+ if (ctx->program->normalisation_level >= VSIR_NORMALISED_SM6) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "DCL_INDEX_RANGE is not allowed with fully normalised input/output."); -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 881e51527ff..bb85e62e94c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -1314,7 +1314,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, - if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; - -- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); - - if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 7837b1fc8e4..a7b935543a0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -10826,7 +10826,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - compile_info, compiler->message_context)) < 0) - return result; - -- VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); - - max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); - if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 0dbcd2f6f07..872603052ac 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2793,7 +2793,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, -- &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) -+ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4)) - return false; - vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); - sm4->ptr = sm4->start; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 88604539fae..3bfb0a7c3cd 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1411,9 +1411,10 @@ enum vsir_control_flow_type - - enum vsir_normalisation_level - { -- VSIR_NOT_NORMALISED, -+ VSIR_NORMALISED_SM1, -+ VSIR_NORMALISED_SM4, - VSIR_NORMALISED_HULL_CONTROL_POINT_IO, -- VSIR_FULLY_NORMALISED_IO, -+ VSIR_NORMALISED_SM6, - }; - - struct vsir_program --- -2.45.2 -