From 342b3b81624129d6a4c4d0f8150e7a32b2a237b0 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Dec 2024 09:15:55 +1100 Subject: [PATCH] Updated vkd3d-latest patchset --- ...-9619582d1b6a54720e17a148a72b446fda2.patch | 4878 +++++++++++++++++ ...-39cbef9e018ee760ffd175fdd6729e47052.patch | 1105 ++++ 2 files changed, 5983 insertions(+) create mode 100644 patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch create mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch new file mode 100644 index 00000000..c2da59c6 --- /dev/null +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-9619582d1b6a54720e17a148a72b446fda2.patch @@ -0,0 +1,4878 @@ +From caa0d7f0cafae8a67fb79dd7426839126285a092 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Fri, 29 Nov 2024 07:14:57 +1100 +Subject: [PATCH] Updated vkd3d to 9619582d1b6a54720e17a148a72b446fda2fd41f. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 2 +- + libs/vkd3d/include/vkd3d_shader.h | 25 + + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 + + libs/vkd3d/libs/vkd3d-shader/dxil.c | 29 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 151 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 69 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 38 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 26 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1164 ++++++++++++++--- + libs/vkd3d/libs/vkd3d-shader/ir.c | 519 +++++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 237 +++- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + + libs/vkd3d/libs/vkd3d-shader/spirv.c | 80 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 1090 +-------------- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 20 + + .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + + 18 files changed, 2038 insertions(+), 1420 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index ec1dd70c9b2..fd62730f948 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -275,7 +275,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index f95caa2f825..cb561d7f079 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -112,6 +112,11 @@ enum vkd3d_shader_structure_type + * \since 1.13 + */ + VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, ++ /** ++ * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -2040,6 +2045,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info + unsigned int combined_sampler_count; + }; + ++/** ++ * A chained structure describing the tessellation information in a hull shader. ++ * ++ * This structure extends vkd3d_shader_compile_info. ++ * ++ * \since 1.15 ++ */ ++struct vkd3d_shader_scan_hull_shader_tessellation_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** The tessellation output primitive. */ ++ enum vkd3d_shader_tessellator_output_primitive output_primitive; ++ /** The tessellation partitioning mode. */ ++ enum vkd3d_shader_tessellator_partitioning partitioning; ++}; ++ + /** + * Data type of a shader varying, returned as part of struct + * vkd3d_shader_signature_element. +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index f60ef7db769..c2c6ad67804 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -20,6 +20,7 @@ + #define WIDL_C_INLINE_WRAPPERS + #endif + #define COBJMACROS ++ + #define CONST_VTABLE + #include "vkd3d.h" + #include "vkd3d_blob.h" +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index 9e2eacbcfa6..bda9bc72f56 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1565,6 +1565,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -1671,6 +1672,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 7099bcc9ce2..71f3c7f17b0 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -3824,7 +3824,7 @@ static void src_params_init_from_operands(struct vkd3d_shader_src_param *src_par + } + + static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( +- enum vkd3d_shader_sysval_semantic sysval_semantic) ++ enum vkd3d_shader_sysval_semantic sysval_semantic, bool is_input) + { + switch (sysval_semantic) + { +@@ -3834,7 +3834,7 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( + case VKD3D_SHADER_SV_SAMPLE_INDEX: + return VKD3DSPR_NULL; + case VKD3D_SHADER_SV_COVERAGE: +- return VKD3DSPR_COVERAGE; ++ return is_input ? VKD3DSPR_COVERAGE : VKD3DSPR_SAMPLEMASK; + case VKD3D_SHADER_SV_DEPTH: + return VKD3DSPR_DEPTHOUT; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: +@@ -3884,7 +3884,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + param = ¶ms[i]; + + if (e->register_index == UINT_MAX +- && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) ++ && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input)) != VKD3DSPR_NULL) + { + dst_param_io_init(param, e, io_reg_type); + continue; +@@ -9348,7 +9348,7 @@ static void signature_element_read_additional_element_values(struct signature_el + } + + static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const struct sm6_metadata_value *m, +- struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain) ++ struct shader_signature *s, enum vkd3d_tessellator_domain tessellator_domain, bool is_input) + { + unsigned int i, j, column_count, operand_count, index; + const struct sm6_metadata_node *node, *element_node; +@@ -9466,7 +9466,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const + + if ((is_register = e->register_index == UINT_MAX)) + { +- if (register_type_from_dxil_semantic_kind(e->sysval_semantic) == VKD3DSPR_INVALID) ++ if (register_type_from_dxil_semantic_kind(e->sysval_semantic, is_input) == VKD3DSPR_INVALID) + { + WARN("Unhandled I/O register semantic kind %u.\n", j); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, +@@ -9578,17 +9578,17 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons + } + + if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], +- &program->input_signature, tessellator_domain)) < 0) ++ &program->input_signature, tessellator_domain, true)) < 0) + { + return ret; + } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], +- &program->output_signature, tessellator_domain)) < 0) ++ &program->output_signature, tessellator_domain, false)) < 0) + { + return ret; + } + if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[2], +- &program->patch_constant_signature, tessellator_domain)) < 0) ++ &program->patch_constant_signature, tessellator_domain, false)) < 0) + { + return ret; + } +@@ -9717,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); + ins->declaration.tessellator_domain = tessellator_domain; ++ sm6->p.program->tess_domain = tessellator_domain; + } + +-static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, +- const char *type) ++static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, ++ unsigned int count, bool allow_zero, const char *type) + { +- if (!count || count > 32) ++ if ((!count && !allow_zero) || count > 32) + { + WARN("%s control point count %u invalid.\n", type, count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +@@ -9951,7 +9952,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa + } + + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); +- sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); ++ sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); + sm6->p.program->input_control_point_count = operands[1]; + + return operands[0]; +@@ -10010,9 +10011,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa + } + } + +- sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); ++ sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); + program->input_control_point_count = operands[1]; +- sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); ++ sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); + program->output_control_point_count = operands[2]; + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index e22177e1e30..064e15c4b60 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -36,6 +36,16 @@ struct fx_4_binary_type + uint32_t typeinfo; + }; + ++struct fx_5_shader ++{ ++ uint32_t offset; ++ uint32_t sodecl[4]; ++ uint32_t sodecl_count; ++ uint32_t rast_stream; ++ uint32_t iface_bindings_count; ++ uint32_t iface_bindings; ++}; ++ + struct string_entry + { + struct rb_entry entry; +@@ -550,6 +560,8 @@ enum fx_4_type_constants + FX_4_ASSIGNMENT_VARIABLE = 0x2, + FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, + FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, ++ FX_4_ASSIGNMENT_INLINE_SHADER = 0x7, ++ FX_5_ASSIGNMENT_INLINE_SHADER = 0x8, + }; + + static const uint32_t fx_4_numeric_base_types[] = +@@ -762,6 +774,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + vkd3d_unreachable(); + + case HLSL_CLASS_VOID: +@@ -1298,6 +1311,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + /* This cannot appear as an extern variable. */ + break; + } +@@ -1834,6 +1848,7 @@ enum state_property_component_type + FX_BLEND, + FX_VERTEXSHADER, + FX_PIXELSHADER, ++ FX_GEOMETRYSHADER, + FX_COMPONENT_TYPE_COUNT, + }; + +@@ -2065,6 +2080,7 @@ fx_4_states[] = + + { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, + { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, ++ { "GeometryShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_GEOMETRYSHADER, 1, 1, 8 }, + { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, + { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, + { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, +@@ -2951,7 +2967,7 @@ static void VKD3D_PRINTF_FUNC(3, 4) fx_parser_error(struct fx_parser *parser, en + + static int fx_2_parse(struct fx_parser *parser) + { +- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented.\n"); ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, "Parsing fx_2_0 binaries is not implemented."); + + return -1; + } +@@ -3120,7 +3136,7 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) + else + { + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, +- "Only numeric and string types are supported in annotations.\n"); ++ "Only numeric and string types are supported in annotations."); + } + + if (type.element_count) +@@ -3210,27 +3226,13 @@ static void fx_parse_buffers(struct fx_parser *parser) + } + } + +-static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) ++static void fx_4_parse_shader_blob(struct fx_parser *parser, unsigned int object_type, const struct fx_5_shader *shader) + { + struct vkd3d_shader_compile_info info = { 0 }; + struct vkd3d_shader_code output; +- uint32_t data_size, offset; + const void *data = NULL; + const char *p, *q, *end; +- struct fx_5_shader +- { +- uint32_t offset; +- uint32_t sodecl[4]; +- uint32_t sodecl_count; +- uint32_t rast_stream; +- uint32_t iface_bindings_count; +- uint32_t iface_bindings; +- } shader5; +- struct fx_4_gs_so +- { +- uint32_t offset; +- uint32_t sodecl; +- } gs_so; ++ uint32_t data_size; + int ret; + + static const struct vkd3d_shader_compile_option options[] = +@@ -3238,35 +3240,9 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + }; + +- switch (object_type) +- { +- case FX_4_OBJECT_TYPE_PIXEL_SHADER: +- case FX_4_OBJECT_TYPE_VERTEX_SHADER: +- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: +- offset = fx_parser_read_u32(parser); +- break; +- +- case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: +- fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); +- offset = gs_so.offset; +- break; +- +- case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: +- case FX_5_OBJECT_TYPE_COMPUTE_SHADER: +- case FX_5_OBJECT_TYPE_HULL_SHADER: +- case FX_5_OBJECT_TYPE_DOMAIN_SHADER: +- fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); +- offset = shader5.offset; +- break; +- +- default: +- parser->failed = true; +- return; +- } +- +- fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); ++ fx_parser_read_unstructured(parser, &data_size, shader->offset, sizeof(data_size)); + if (data_size) +- data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); ++ data = fx_parser_get_unstructured_ptr(parser, shader->offset + 4, data_size); + + if (!data) + return; +@@ -3283,7 +3259,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) + { + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, +- "Failed to disassemble shader blob.\n"); ++ "Failed to disassemble shader blob."); + return; + } + parse_fx_print_indent(parser); +@@ -3307,26 +3283,58 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + + parse_fx_print_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "}"); +- if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) ++ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && shader->sodecl[0]) + { + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", +- fx_4_get_string(parser, gs_so.sodecl)); ++ fx_4_get_string(parser, shader->sodecl[0])); + } + else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) + { +- for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) ++ for (unsigned int i = 0; i < ARRAY_SIZE(shader->sodecl); ++i) + { +- if (shader5.sodecl[i]) ++ if (shader->sodecl[i]) + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", +- i, fx_4_get_string(parser, shader5.sodecl[i])); ++ i, fx_4_get_string(parser, shader->sodecl[i])); + } +- if (shader5.sodecl_count) +- vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); ++ if (shader->sodecl_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader->rast_stream); + } + + vkd3d_shader_free_shader_code(&output); + } + ++static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) ++{ ++ struct fx_5_shader shader = { 0 }; ++ ++ switch (object_type) ++ { ++ case FX_4_OBJECT_TYPE_PIXEL_SHADER: ++ case FX_4_OBJECT_TYPE_VERTEX_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: ++ shader.offset = fx_parser_read_u32(parser); ++ break; ++ ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: ++ shader.offset = fx_parser_read_u32(parser); ++ shader.sodecl[0] = fx_parser_read_u32(parser); ++ break; ++ ++ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: ++ case FX_5_OBJECT_TYPE_HULL_SHADER: ++ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: ++ fx_parser_read_u32s(parser, &shader, sizeof(shader)); ++ break; ++ ++ default: ++ parser->failed = true; ++ return; ++ } ++ ++ fx_4_parse_shader_blob(parser, object_type, &shader); ++} ++ + static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) + { + switch (type->typeinfo) +@@ -3390,6 +3398,8 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 + [FX_UINT8] = "byte", + }; + const struct rhs_named_value *named_value; ++ struct fx_5_shader shader = { 0 }; ++ unsigned int shader_type = 0; + uint32_t i, j, comp_count; + struct fx_4_state *state; + +@@ -3400,7 +3410,7 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 + if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), + sizeof(*fx_4_states), fx_4_state_id_compare))) + { +- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.", entry.id); + break; + } + +@@ -3486,9 +3496,38 @@ static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32 + vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), + fx_4_get_string(parser, index.index)); + break; ++ case FX_4_ASSIGNMENT_INLINE_SHADER: ++ case FX_5_ASSIGNMENT_INLINE_SHADER: ++ { ++ bool shader5 = entry.type == FX_5_ASSIGNMENT_INLINE_SHADER; ++ ++ if (shader5) ++ fx_parser_read_unstructured(parser, &shader, entry.value, sizeof(shader)); ++ else ++ fx_parser_read_unstructured(parser, &shader, entry.value, 2 * sizeof(uint32_t)); ++ ++ if (state->type == FX_PIXELSHADER) ++ shader_type = FX_4_OBJECT_TYPE_PIXEL_SHADER; ++ else if (state->type == FX_VERTEXSHADER) ++ shader_type = FX_4_OBJECT_TYPE_VERTEX_SHADER; ++ else if (state->type == FX_GEOMETRYSHADER) ++ shader_type = shader5 ? FX_5_OBJECT_TYPE_GEOMETRY_SHADER : FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO; ++ else if (state->type == FX_HULLSHADER) ++ shader_type = FX_5_OBJECT_TYPE_HULL_SHADER; ++ else if (state->type == FX_DOMAINSHADER) ++ shader_type = FX_5_OBJECT_TYPE_DOMAIN_SHADER; ++ else if (state->type == FX_COMPUTESHADER) ++ shader_type = FX_5_OBJECT_TYPE_COMPUTE_SHADER; ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_start_indent(parser); ++ fx_4_parse_shader_blob(parser, shader_type, &shader); ++ parse_fx_end_indent(parser); ++ break; ++ } + default: + fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, +- "Unsupported assignment type %u.\n", entry.type); ++ "Unsupported assignment type %u.", entry.type); + } + vkd3d_string_buffer_printf(&parser->buffer, ";\n"); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 96de18dc886..97c6c0a1377 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -287,6 +287,7 @@ bool hlsl_type_is_shader(const struct hlsl_type *type) + case HLSL_CLASS_UAV: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_VOID: + case HLSL_CLASS_NULL: + return false; +@@ -434,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -525,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_STREAM_OUTPUT: + break; + } + vkd3d_unreachable(); +@@ -680,6 +683,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + vkd3d_unreachable(); + } + type = next_type; +@@ -898,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba + return type; + } + ++struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, ++ enum hlsl_so_object_type so_type, struct hlsl_type *data_type) ++{ ++ struct hlsl_type *type; ++ ++ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) ++ return NULL; ++ type->class = HLSL_CLASS_STREAM_OUTPUT; ++ type->e.so.so_type = so_type; ++ type->e.so.type = data_type; ++ ++ list_add_tail(&ctx->types, &type->entry); ++ ++ return type; ++} ++ + struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + struct hlsl_struct_field *fields, size_t field_count) + { +@@ -1086,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_STREAM_OUTPUT: + break; + } + +@@ -1157,6 +1178,11 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + case HLSL_CLASS_CONSTANT_BUFFER: + return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); + ++ case HLSL_CLASS_STREAM_OUTPUT: ++ if (t1->e.so.so_type != t2->e.so.so_type) ++ return false; ++ return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); ++ + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: +@@ -1695,22 +1721,6 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * + return &s->node; + } + +-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, +- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_vsir_instruction_ref *vsir_instr; +- +- if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) +- return NULL; +- init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); +- vsir_instr->vsir_instr_idx = vsir_instr_idx; +- +- if (reg) +- vsir_instr->node.reg = *reg; +- +- return &vsir_instr->node; +-} +- + struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) + { +@@ -2533,9 +2543,6 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + + case HLSL_IR_STATEBLOCK_CONSTANT: + return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_unreachable(); + } + + vkd3d_unreachable(); +@@ -2836,6 +2843,20 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + vkd3d_string_buffer_printf(string, ""); + return string; + ++ case HLSL_CLASS_STREAM_OUTPUT: ++ if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) ++ vkd3d_string_buffer_printf(string, "PointStream"); ++ else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) ++ vkd3d_string_buffer_printf(string, "LineStream"); ++ else ++ vkd3d_string_buffer_printf(string, "TriangleStream"); ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.so.type))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } ++ return string; ++ + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: +@@ -2968,7 +2989,6 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", + [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", + [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", +- [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", + }; + + if (type >= ARRAY_SIZE(names)) +@@ -3562,11 +3582,6 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + case HLSL_IR_STATEBLOCK_CONSTANT: + dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); + break; +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", +- hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); +- break; + } + } + +@@ -3875,10 +3890,6 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + case HLSL_IR_STATEBLOCK_CONSTANT: + free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); + break; +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); +- break; + } + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 075c76cb0e2..25d1b8df947 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -22,7 +22,6 @@ + + #include "vkd3d_shader_private.h" + #include "wine/rbtree.h" +-#include "d3dcommon.h" + #include "d3dx9shader.h" + + /* The general IR structure is inspired by Mesa GLSL hir, even though the code +@@ -105,6 +104,7 @@ enum hlsl_type_class + HLSL_CLASS_GEOMETRY_SHADER, + HLSL_CLASS_CONSTANT_BUFFER, + HLSL_CLASS_BLEND_STATE, ++ HLSL_CLASS_STREAM_OUTPUT, + HLSL_CLASS_VOID, + HLSL_CLASS_NULL, + HLSL_CLASS_ERROR, +@@ -142,6 +142,13 @@ enum hlsl_sampler_dim + /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ + }; + ++enum hlsl_so_object_type ++{ ++ HLSL_STREAM_OUTPUT_POINT_STREAM, ++ HLSL_STREAM_OUTPUT_LINE_STREAM, ++ HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, ++}; ++ + enum hlsl_regset + { + HLSL_REGSET_SAMPLERS, +@@ -220,6 +227,12 @@ struct hlsl_type + } resource; + /* Additional field to distinguish object types. Currently used only for technique types. */ + unsigned int version; ++ /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ ++ struct ++ { ++ struct hlsl_type *type; ++ enum hlsl_so_object_type so_type; ++ } so; + } e; + + /* Number of numeric register components used by one value of this type, for each regset. +@@ -330,8 +343,6 @@ enum hlsl_ir_node_type + HLSL_IR_COMPILE, + HLSL_IR_SAMPLER_STATE, + HLSL_IR_STATEBLOCK_CONSTANT, +- +- HLSL_IR_VSIR_INSTRUCTION_REF, + }; + + /* Common data for every type of IR instruction node. */ +@@ -934,16 +945,6 @@ struct hlsl_ir_stateblock_constant + char *name; + }; + +-/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. +- * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ +-struct hlsl_ir_vsir_instruction_ref +-{ +- struct hlsl_ir_node node; +- +- /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ +- unsigned int vsir_instr_idx; +-}; +- + struct hlsl_scope + { + /* Item entry for hlsl_ctx.scopes. */ +@@ -1259,12 +1260,6 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co + return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); + } + +-static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) +-{ +- VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); +- return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); +-} +- + static inline void hlsl_block_init(struct hlsl_block *block) + { + list_init(&block->instrs); +@@ -1519,6 +1514,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond + struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); ++struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, ++ enum hlsl_so_object_type so_type, struct hlsl_type *type); + struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); + +@@ -1588,9 +1585,6 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned + struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, + struct list *cases, const struct vkd3d_shader_location *loc); + +-struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, +- struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); +- + void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); + void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index 8dace11916a..31fb30521e9 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -104,6 +104,7 @@ if {return KW_IF; } + in {return KW_IN; } + inline {return KW_INLINE; } + inout {return KW_INOUT; } ++LineStream {return KW_LINESTREAM; } + linear {return KW_LINEAR; } + matrix {return KW_MATRIX; } + namespace {return KW_NAMESPACE; } +@@ -114,6 +115,7 @@ out {return KW_OUT; } + packoffset {return KW_PACKOFFSET; } + pass {return KW_PASS; } + PixelShader {return KW_PIXELSHADER; } ++PointStream {return KW_POINTSTREAM; } + pixelshader {return KW_PIXELSHADER; } + RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } + RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } +@@ -170,6 +172,7 @@ texture3D {return KW_TEXTURE3D; } + TextureCube {return KW_TEXTURECUBE; } + textureCUBE {return KW_TEXTURECUBE; } + TextureCubeArray {return KW_TEXTURECUBEARRAY; } ++TriangleStream {return KW_TRIANGLESTREAM; } + true {return KW_TRUE; } + typedef {return KW_TYPEDEF; } + unsigned {return KW_UNSIGNED; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 60aade732db..5bcd5e9034b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -678,8 +678,6 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + break; +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vkd3d_unreachable(); + } + } + +@@ -6553,6 +6551,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct hlsl_semantic semantic; + enum hlsl_buffer_type buffer_type; + enum hlsl_sampler_dim sampler_dim; ++ enum hlsl_so_object_type so_type; + struct hlsl_attribute *attr; + struct parse_attribute_list attr_list; + struct hlsl_ir_switch_case *switch_case; +@@ -6596,6 +6595,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_INLINE + %token KW_INOUT + %token KW_LINEAR ++%token KW_LINESTREAM + %token KW_MATRIX + %token KW_NAMESPACE + %token KW_NOINTERPOLATION +@@ -6605,6 +6605,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_PACKOFFSET + %token KW_PASS + %token KW_PIXELSHADER ++%token KW_POINTSTREAM + %token KW_RASTERIZERORDEREDBUFFER + %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER + %token KW_RASTERIZERORDEREDTEXTURE1D +@@ -6654,6 +6655,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + %token KW_TEXTURE3D + %token KW_TEXTURECUBE + %token KW_TEXTURECUBEARRAY ++%token KW_TRIANGLESTREAM + %token KW_TRUE + %token KW_TYPEDEF + %token KW_UNSIGNED +@@ -6784,6 +6786,8 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + + %type semantic + ++%type so_type ++ + %type state_block + + %type state_block_index_opt +@@ -7805,6 +7809,20 @@ rov_type: + $$ = HLSL_SAMPLER_DIM_3D; + } + ++so_type: ++ KW_POINTSTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; ++ } ++ | KW_LINESTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; ++ } ++ | KW_TRIANGLESTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; ++ } ++ + resource_format: + var_modifiers type + { +@@ -7948,6 +7966,10 @@ type_no_void: + validate_uav_type(ctx, $1, $3, &@4); + $$ = hlsl_new_uav_type(ctx, $1, $3, true); + } ++ | so_type '<' type '>' ++ { ++ $$ = hlsl_new_stream_output_type(ctx, $1, $3); ++ } + | KW_RWBYTEADDRESSBUFFER + { + $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index d11ff481f6b..8f45628dbee 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -19,6 +19,7 @@ + */ + + #include "hlsl.h" ++#include "vkd3d_shader_private.h" + #include + #include + +@@ -1678,6 +1679,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + +@@ -4162,9 +4164,6 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ +- vkd3d_unreachable(); + } + + return false; +@@ -4304,9 +4303,6 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ +- vkd3d_unreachable(); + + case HLSL_IR_STORE: + { +@@ -4494,6 +4490,9 @@ struct register_allocator + + /* Two allocations with different mode can't share the same register. */ + int mode; ++ /* If an allocation is VIP, no new allocations can be made in the ++ * register unless they are VIP as well. */ ++ bool vip; + } *allocations; + size_t count, capacity; + +@@ -4513,7 +4512,7 @@ struct register_allocator + }; + + static unsigned int get_available_writemask(const struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) ++ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip) + { + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i; +@@ -4532,6 +4531,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all + writemask &= ~allocation->writemask; + if (allocation->mode != mode) + writemask = 0; ++ if (allocation->vip && !vip) ++ writemask = 0; + } + + if (!writemask) +@@ -4542,7 +4543,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all + } + + static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, +- unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) ++ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip) + { + struct allocation *allocation; + +@@ -4556,16 +4557,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a + allocation->first_write = first_write; + allocation->last_read = last_read; + allocation->mode = mode; ++ allocation->vip = vip; + + allocator->reg_count = max(allocator->reg_count, reg_idx + 1); + } + +-/* reg_size is the number of register components to be reserved, while component_count is the number +- * of components for the register's writemask. In SM1, floats and vectors allocate the whole +- * register, even if they don't use it completely. */ ++/* Allocates a register (or some components of it) within the register allocator. ++ * 'reg_size' is the number of register components to be reserved. ++ * 'component_count' is the number of components for the hlsl_reg's ++ * writemask, which can be smaller than 'reg_size'. For instance, sm1 ++ * floats and vectors allocate the whole register even if they are not ++ * using all components. ++ * 'mode' can be provided to avoid allocating on a register that already has an ++ * allocation with a different mode. ++ * 'force_align' can be used so that the allocation always start in '.x'. ++ * 'vip' can be used so that no new allocations can be made in the given register ++ * unless they are 'vip' as well. */ + static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, unsigned int reg_size, +- unsigned int component_count, int mode, bool force_align) ++ unsigned int component_count, int mode, bool force_align, bool vip) + { + struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; + unsigned int required_size = force_align ? 4 : reg_size; +@@ -4579,7 +4589,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) + { + unsigned int available_writemask = get_available_writemask(allocator, +- first_write, last_read, reg_idx, mode); ++ first_write, last_read, reg_idx, mode, vip); + + if (vkd3d_popcount(available_writemask) >= pref) + { +@@ -4589,7 +4599,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + ret.id = reg_idx; + ret.writemask = hlsl_combine_writemasks(writemask, + vkd3d_write_mask_from_component_count(component_count)); +- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); ++ ++ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip); + return ret; + } + } +@@ -4598,13 +4609,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + ret.id = allocator->reg_count; + ret.writemask = vkd3d_write_mask_from_component_count(component_count); + record_allocation(ctx, allocator, allocator->reg_count, +- vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); ++ vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip); + return ret; + } + + /* Allocate a register with writemask, while reserving reg_writemask. */ +-static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) ++static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, ++ struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, ++ uint32_t reg_writemask, uint32_t writemask, int mode, bool vip) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4614,11 +4626,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + for (reg_idx = 0;; ++reg_idx) + { + if ((get_available_writemask(allocator, first_write, last_read, +- reg_idx, mode) & reg_writemask) == reg_writemask) ++ reg_idx, mode, vip) & reg_writemask) == reg_writemask) + break; + } + +- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); ++ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip); + + ret.id = reg_idx; + ret.allocation_size = 1; +@@ -4628,7 +4640,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + } + + static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, +- unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) ++ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip) + { + unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; + unsigned int writemask; +@@ -4636,18 +4648,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig + + for (i = 0; i < (reg_size / 4); ++i) + { +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip); + if (writemask != VKD3DSP_WRITEMASK_ALL) + return false; + } +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip); + if ((writemask & last_reg_mask) != last_reg_mask) + return false; + return true; + } + + static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) ++ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4655,15 +4667,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo + + for (reg_idx = 0;; ++reg_idx) + { +- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) ++ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip)) + break; + } + + for (i = 0; i < reg_size / 4; ++i) +- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); ++ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip); + if (reg_size % 4) + record_allocation(ctx, allocator, reg_idx + (reg_size / 4), +- (1u << (reg_size % 4)) - 1, first_write, last_read, mode); ++ (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip); + + ret.id = reg_idx; + ret.allocation_size = align(reg_size, 4) / 4; +@@ -4679,9 +4691,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + /* FIXME: We could potentially pack structs or arrays more efficiently... */ + + if (type->class <= HLSL_CLASS_VECTOR) +- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); ++ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false); + else +- return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); ++ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false); + } + + static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) +@@ -4859,8 +4871,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, + } + + if (reg_writemask) +- instr->reg = allocate_register_with_masks(ctx, allocator, +- instr->index, instr->last_read, reg_writemask, dst_writemask, 0); ++ instr->reg = allocate_register_with_masks(ctx, allocator, instr->index, ++ instr->last_read, reg_writemask, dst_writemask, 0, false); + else + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, + instr->index, instr->last_read, instr->data_type); +@@ -5181,14 +5193,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + { + if (i < bind_count) + { +- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) ++ if (get_available_writemask(&allocator_used, 1, UINT_MAX, ++ reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } +- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); ++ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); + } +- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); ++ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false); + } + + var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; +@@ -5211,7 +5224,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { +- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } +@@ -5254,7 +5267,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun + var = entry_func->parameters.vars[i]; + if (var->is_output_semantic) + { +- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); ++ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, ++ var->first_write, var->last_read, 0, false); + break; + } + } +@@ -5311,6 +5325,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + enum vkd3d_shader_register_type type; + struct vkd3d_shader_version version; ++ bool special_interpolation = false; ++ bool vip_allocation = false; + uint32_t reg; + bool builtin; + +@@ -5363,6 +5379,14 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + * domains, it is allocated as if it was 'float[1]'. */ + var->force_align = true; + } ++ ++ if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX ++ || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX ++ || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID) ++ vip_allocation = true; ++ ++ if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX) ++ special_interpolation = true; + } + + if (builtin) +@@ -5376,8 +5400,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); + unsigned int reg_size = optimize ? var->data_type->dimx : 4; + +- var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, +- UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); ++ if (special_interpolation) ++ mode = VKD3DSIM_NONE; ++ ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX, ++ reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation); + + TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', + var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); +@@ -6812,7 +6839,7 @@ static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src + } + + static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, +- struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) ++ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) + { + struct hlsl_ir_constant *constant; + +@@ -6832,6 +6859,242 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, + } + } + ++static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) ++{ ++ const struct hlsl_ir_var *var = deref->var; ++ unsigned int offset_const_deref; ++ ++ reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; ++ reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ ++ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ ++ if (!var->indexable) ++ { ++ offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx[0].offset += offset_const_deref / 4; ++ reg->idx_count = 1; ++ } ++ else ++ { ++ offset_const_deref = deref->const_offset; ++ reg->idx[1].offset = offset_const_deref / 4; ++ reg->idx_count = 2; ++ ++ if (deref->rel_offset.node) ++ { ++ struct vkd3d_shader_src_param *idx_src; ++ ++ if (!(idx_src = vsir_program_get_src_params(program, 1))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return false; ++ } ++ memset(idx_src, 0, sizeof(*idx_src)); ++ reg->idx[1].rel_addr = idx_src; ++ ++ vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ } ++ ++ *writemask = 0xf & (0xf << (offset_const_deref % 4)); ++ if (var->regs[HLSL_REGSET_NUMERIC].writemask) ++ *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); ++ return true; ++} ++ ++static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); ++ const struct hlsl_ir_var *var = deref->var; ++ ++ if (var->is_uniform) ++ { ++ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); ++ ++ if (regset == HLSL_REGSET_TEXTURES) ++ { ++ reg->type = VKD3DSPR_RESOURCE; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_UAVS) ++ { ++ reg->type = VKD3DSPR_UAV; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_SAMPLERS) ++ { ++ reg->type = VKD3DSPR_SAMPLER; ++ reg->dimension = VSIR_DIMENSION_NONE; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; ++ ++ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); ++ reg->type = VKD3DSPR_CONSTBUFFER; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ ++ reg->idx[2].offset = offset / 4; ++ reg->idx_count = 3; ++ } ++ else ++ { ++ reg->idx[0].offset = var->buffer->reg.index; ++ reg->idx[1].offset = offset / 4; ++ reg->idx_count = 2; ++ } ++ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); ++ } ++ } ++ else if (var->is_input_semantic) ++ { ++ bool has_idx; ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0].offset = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ VKD3D_ASSERT(hlsl_reg.allocated); ++ ++ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ reg->type = VKD3DSPR_PATCHCONST; ++ else ++ reg->type = VKD3DSPR_INPUT; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ reg->idx[0].offset = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else if (var->is_output_semantic) ++ { ++ bool has_idx; ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0].offset = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ VKD3D_ASSERT(hlsl_reg.allocated); ++ reg->type = VKD3DSPR_OUTPUT; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ reg->idx[0].offset = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else ++ { ++ return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, ++ unsigned int dst_writemask, const struct vkd3d_shader_location *loc) ++{ ++ uint32_t writemask; ++ ++ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) ++ return false; ++ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); ++ return true; ++} ++ ++static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, ++ const struct vkd3d_shader_location *loc, unsigned int writemask) ++{ ++ uint32_t reg_writemask; ++ ++ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) ++ return false; ++ dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); ++ return true; ++} ++ + static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, + struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) + { +@@ -7059,13 +7322,10 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + +- case HLSL_TYPE_BOOL: +- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); +- break; +- + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); + break; +@@ -7659,40 +7919,6 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + sm1_generate_vsir_block(ctx, &entry_func->body, program); + } + +-static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) +-{ +- struct vkd3d_shader_location *loc; +- struct hlsl_ir_node *vsir_instr; +- +- loc = &program->instructions.elements[program->instructions.count - 1].location; +- +- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) +- { +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; +- } +- hlsl_block_add_instr(block, vsir_instr); +-} +- +-static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, +- struct vsir_program *program, struct hlsl_ir_node *instr) +-{ +- struct vkd3d_shader_location *loc; +- struct hlsl_ir_node *vsir_instr; +- +- loc = &program->instructions.elements[program->instructions.count - 1].location; +- +- if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, +- program->instructions.count - 1, instr->data_type, &instr->reg, loc))) +- { +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; +- } +- +- list_add_before(&instr->entry, &vsir_instr->entry); +- hlsl_replace_node(instr, vsir_instr); +-} +- + static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, + const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) +@@ -7806,8 +8032,6 @@ static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vs + + if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) + ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); +- +- add_last_vsir_instr_to_block(ctx, program, block); + } + + static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, +@@ -7819,8 +8043,6 @@ static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_ + return; + + ins->declaration.count = temp_count; +- +- add_last_vsir_instr_to_block(ctx, program, block); + } + + static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, +@@ -7838,8 +8060,6 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, + ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; + ins->declaration.indexable_temp.component_count = comp_count; + ins->declaration.indexable_temp.has_function_scope = false; +- +- add_last_vsir_instr_to_block(ctx, program, block); + } + + static bool type_is_float(const struct hlsl_type *type) +@@ -8505,99 +8725,730 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, + } + } + +-static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_store *store) + { +- struct vkd3d_string_buffer *dst_type_string; +- struct hlsl_ir_node *instr, *next; +- struct hlsl_ir_switch_case *c; ++ struct hlsl_ir_node *instr = &store->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_instruction *ins; + +- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) +- { +- if (instr->data_type) +- { +- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) +- { +- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); +- break; +- } +- } ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) ++ return false; + +- switch (instr->type) +- { +- case HLSL_IR_CALL: +- vkd3d_unreachable(); ++ dst_param = &ins->dst[0]; ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ dst_param, &store->lhs, &instr->loc, store->writemask)) ++ return false; + +- case HLSL_IR_CONSTANT: +- /* In SM4 all constants are inlined. */ +- break; ++ src_param = &ins->src[0]; ++ vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); + +- case HLSL_IR_EXPR: +- if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) +- break; ++ return true; ++} + +- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) +- replace_instr_with_last_vsir_instr(ctx, program, instr); ++/* Does this variable's data come directly from the API user, rather than ++ * being temporary or from a previous shader stage? I.e. is it a uniform or ++ * VS input? */ ++static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) ++{ ++ if (var->is_uniform) ++ return true; + +- hlsl_release_string_buffer(ctx, dst_type_string); +- break; ++ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; ++} + +- case HLSL_IR_IF: +- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); +- sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); +- break; ++static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_type *type = load->node.data_type; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct hlsl_ir_node *instr = &load->node; ++ struct vkd3d_shader_instruction *ins; ++ struct hlsl_constant_value value; + +- case HLSL_IR_LOOP: +- sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); +- break; ++ VKD3D_ASSERT(hlsl_is_numeric_type(type)); ++ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) ++ { ++ /* Uniform bools can be specified as anything, but internal bools ++ * always have 0 for false and ~0 for true. Normalise that here. */ + +- case HLSL_IR_SWITCH: +- LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) +- sm4_generate_vsir_block(ctx, &c->body, program); +- break; ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) ++ return false; + +- case HLSL_IR_SWIZZLE: +- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); +- replace_instr_with_last_vsir_instr(ctx, program, instr); +- break; ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); + +- default: +- break; +- } ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) ++ return false; ++ ++ memset(&value, 0xff, sizeof(value)); ++ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, ++ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ memset(&value, 0x00, sizeof(value)); ++ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, ++ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ } ++ else ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) ++ return false; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) ++ return false; + } ++ return true; + } + +-static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, +- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) ++static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_resource_store *store) + { +- bool is_patch_constant_func = func == ctx->patch_constant_func; +- struct hlsl_block block = {0}; +- struct hlsl_scope *scope; +- struct hlsl_ir_var *var; +- uint32_t temp_count; +- +- compute_liveness(ctx, func); +- mark_indexable_vars(ctx, func); +- temp_count = allocate_temp_registers(ctx, func); +- if (ctx->result) +- return; +- program->temp_count = max(program->temp_count, temp_count); ++ struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); ++ struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; ++ struct hlsl_ir_node *instr = &store->node; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int writemask; + +- hlsl_block_init(&block); ++ if (!store->resource.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); ++ return false; ++ } + +- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { +- if ((var->is_input_semantic && var->last_read) +- || (var->is_output_semantic && var->first_write)) +- sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); ++ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); ++ return false; + } + +- if (temp_count) +- sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) ++ return false; + +- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ &ins->dst[0], &store->resource, &instr->loc, writemask)) ++ return false; ++ } ++ else + { +- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) +- { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) ++ return false; ++ ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) ++ return false; ++ } ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); ++ ++ return true; ++} ++ ++static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset) ++{ ++ struct hlsl_ir_constant *offset; ++ ++ VKD3D_ASSERT(texel_offset); ++ if (texel_offset->type != HLSL_IR_CONSTANT) ++ return false; ++ offset = hlsl_ir_constant(texel_offset); ++ ++ if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7) ++ return false; ++ if (offset->node.data_type->dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7)) ++ return false; ++ if (offset->node.data_type->dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7)) ++ return false; ++ return true; ++} ++ ++static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi( ++ struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset) ++{ ++ struct hlsl_ir_constant *offset; ++ ++ if (!texel_offset) ++ return; ++ offset = hlsl_ir_constant(texel_offset); ++ ++ ins->texel_offset.u = offset->value.u[0].i; ++ ins->texel_offset.v = 0; ++ ins->texel_offset.w = 0; ++ if (offset->node.data_type->dimx > 1) ++ ins->texel_offset.v = offset->value.u[1].i; ++ if (offset->node.data_type->dimx > 2) ++ ins->texel_offset.w = offset->value.u[2].i; ++} ++ ++static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource); ++ bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS); ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; ++ const struct hlsl_ir_node *sample_index = load->sample_index.node; ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *instr = &load->node; ++ enum hlsl_sampler_dim dim = load->sampling_dim; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ bool multisampled; ++ ++ VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD); ++ ++ multisampled = resource_type->class == HLSL_CLASS_TEXTURE ++ && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); ++ ++ if (uav) ++ opcode = VKD3DSIH_LD_UAV_TYPED; ++ else if (raw) ++ opcode = VKD3DSIH_LD_RAW; ++ else ++ opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled))) ++ return false; ++ ++ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7."); ++ return false; ++ } ++ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ if (!uav) ++ { ++ /* Mipmap level is in the last component in the IR, but needs to be in ++ * the W component in the instruction. */ ++ unsigned int dim_count = hlsl_sampler_dim_count(dim); ++ ++ if (dim_count == 1) ++ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; ++ if (dim_count == 2) ++ coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; ++ } ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (multisampled) ++ { ++ if (sample_index->type == HLSL_IR_CONSTANT) ++ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, ++ &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0); ++ else if (version->major == 4 && version->minor == 0) ++ hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); ++ else ++ vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_deref *sampler = &load->sampler; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ unsigned int src_count; ++ ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_SAMPLE: ++ opcode = VKD3DSIH_SAMPLE; ++ src_count = 3; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_CMP: ++ opcode = VKD3DSIH_SAMPLE_C; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_CMP_LZ: ++ opcode = VKD3DSIH_SAMPLE_C_LZ; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ opcode = VKD3DSIH_SAMPLE_LOD; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ opcode = VKD3DSIH_SAMPLE_B; ++ src_count = 4; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ opcode = VKD3DSIH_SAMPLE_GRAD; ++ src_count = 5; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count))) ++ return false; ++ ++ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7."); ++ return false; ++ } ++ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], ++ resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2], ++ sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) ++ return false; ++ ++ if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B) ++ { ++ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ) ++ { ++ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ else if (opcode == VKD3DSIH_SAMPLE_GRAD) ++ { ++ vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program, ++ const struct hlsl_ir_resource_load *load, uint32_t swizzle) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_ir_node *texel_offset = load->texel_offset.node; ++ const struct hlsl_ir_node *coords = load->coords.node; ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_deref *sampler = &load->sampler; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_opcode opcode; ++ ++ opcode = VKD3DSIH_GATHER4; ++ if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset)) ++ { ++ if (!vkd3d_shader_ver_ge(version, 5, 0)) ++ { ++ hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); ++ return false; ++ } ++ opcode = VKD3DSIH_GATHER4_PO; ++ } ++ ++ if (opcode == VKD3DSIH_GATHER4) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 3))) ++ return false; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) ++ return false; ++ ins->src[2].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[2].swizzle = swizzle; ++ } ++ else if (opcode == VKD3DSIH_GATHER4_PO) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 4))) ++ return false; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[2], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[3], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc)) ++ return false; ++ ins->src[3].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[3].swizzle = swizzle; ++ } ++ else ++ { ++ vkd3d_unreachable(); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct hlsl_type *type = instr->data_type; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) ++ return false; ++ ++ if (type->e.numeric.type == HLSL_TYPE_UINT) ++ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *instr = &load->node; ++ struct hlsl_type *type = instr->data_type; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER ++ || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers."); ++ return false; ++ } ++ ++ VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2))) ++ return false; ++ ++ if (type->e.numeric.type == HLSL_TYPE_UINT) ++ ins->flags = VKD3DSI_RESINFO_UINT; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc)) ++ return false; ++ ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_resource_load *load) ++{ ++ if (load->sampler.var && !load->sampler.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable."); ++ return false; ++ } ++ ++ if (!load->resource.var->is_uniform) ++ { ++ hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable."); ++ return false; ++ } ++ ++ switch (load->load_type) ++ { ++ case HLSL_RESOURCE_LOAD: ++ return sm4_generate_vsir_instr_ld(ctx, program, load); ++ ++ case HLSL_RESOURCE_SAMPLE: ++ case HLSL_RESOURCE_SAMPLE_CMP: ++ case HLSL_RESOURCE_SAMPLE_CMP_LZ: ++ case HLSL_RESOURCE_SAMPLE_LOD: ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ /* Combined sample expressions were lowered. */ ++ VKD3D_ASSERT(load->sampler.var); ++ return sm4_generate_vsir_instr_sample(ctx, program, load); ++ ++ case HLSL_RESOURCE_GATHER_RED: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(X, X, X, X)); ++ ++ case HLSL_RESOURCE_GATHER_GREEN: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y)); ++ ++ case HLSL_RESOURCE_GATHER_BLUE: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z)); ++ ++ case HLSL_RESOURCE_GATHER_ALPHA: ++ return sm4_generate_vsir_instr_gather(ctx, program, load, VKD3D_SHADER_SWIZZLE(W, W, W, W)); ++ ++ case HLSL_RESOURCE_SAMPLE_INFO: ++ return sm4_generate_vsir_instr_sample_info(ctx, program, load); ++ ++ case HLSL_RESOURCE_RESINFO: ++ return sm4_generate_vsir_instr_resinfo(ctx, program, load); ++ ++ case HLSL_RESOURCE_SAMPLE_PROJ: ++ vkd3d_unreachable(); ++ ++ default: ++ return false; ++ } ++} ++ ++static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_jump *jump) ++{ ++ const struct hlsl_ir_node *instr = &jump->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ switch (jump->type) ++ { ++ case HLSL_IR_JUMP_BREAK: ++ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0); ++ ++ case HLSL_IR_JUMP_CONTINUE: ++ return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0); ++ ++ case HLSL_IR_JUMP_DISCARD_NZ: ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1))) ++ return false; ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL); ++ return true; ++ ++ case HLSL_IR_JUMP_RETURN: ++ vkd3d_unreachable(); ++ ++ default: ++ hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); ++ return false; ++ } ++} ++ ++static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program); ++ ++static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff) ++{ ++ struct hlsl_ir_node *instr = &iff->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1))) ++ return; ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ; ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL); ++ ++ sm4_generate_vsir_block(ctx, &iff->then_block, program); ++ ++ if (!list_empty(&iff->else_block.instrs)) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0))) ++ return; ++ sm4_generate_vsir_block(ctx, &iff->else_block, program); ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0))) ++ return; ++} ++ ++static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_loop *loop) ++{ ++ struct hlsl_ir_node *instr = &loop->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0))) ++ return; ++ ++ sm4_generate_vsir_block(ctx, &loop->body, program); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0))) ++ return; ++} ++ ++static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_switch *swi) ++{ ++ const struct hlsl_ir_node *selector = swi->selector.node; ++ struct hlsl_ir_node *instr = &swi->node; ++ struct vkd3d_shader_instruction *ins; ++ struct hlsl_ir_switch_case *cas; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1))) ++ return; ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL); ++ ++ LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry) ++ { ++ if (cas->is_default) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0))) ++ return; ++ } ++ else ++ { ++ struct hlsl_constant_value value = {.u[0].u = cas->value}; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1))) ++ return; ++ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL); ++ } ++ ++ sm4_generate_vsir_block(ctx, &cas->body, program); ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0))) ++ return; ++} ++ ++static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++{ ++ struct vkd3d_string_buffer *dst_type_string; ++ struct hlsl_ir_node *instr, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->data_type) ++ { ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); ++ break; ++ } ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_CALL: ++ vkd3d_unreachable(); ++ ++ case HLSL_IR_CONSTANT: ++ /* In SM4 all constants are inlined. */ ++ break; ++ ++ case HLSL_IR_EXPR: ++ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) ++ break; ++ sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer); ++ hlsl_release_string_buffer(ctx, dst_type_string); ++ break; ++ ++ case HLSL_IR_IF: ++ sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr)); ++ break; ++ ++ case HLSL_IR_LOAD: ++ sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr)); ++ break; ++ ++ case HLSL_IR_LOOP: ++ sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr)); ++ break; ++ ++ case HLSL_IR_RESOURCE_LOAD: ++ sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr)); ++ break; ++ ++ case HLSL_IR_RESOURCE_STORE: ++ sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr)); ++ break; ++ ++ case HLSL_IR_JUMP: ++ sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr)); ++ break; ++ ++ case HLSL_IR_STORE: ++ sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr)); ++ break; ++ ++ case HLSL_IR_SWITCH: ++ sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr)); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); ++ break; ++ ++ default: ++ break; ++ } ++ } ++} ++ ++static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) ++{ ++ bool is_patch_constant_func = func == ctx->patch_constant_func; ++ struct hlsl_block block = {0}; ++ struct hlsl_scope *scope; ++ struct hlsl_ir_var *var; ++ uint32_t temp_count; ++ ++ compute_liveness(ctx, func); ++ mark_indexable_vars(ctx, func); ++ temp_count = allocate_temp_registers(ctx, func); ++ if (ctx->result) ++ return; ++ program->temp_count = max(program->temp_count, temp_count); ++ ++ hlsl_block_init(&block); ++ ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if ((var->is_input_semantic && var->last_read) ++ || (var->is_output_semantic && var->first_write)) ++ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); ++ } ++ ++ if (temp_count) ++ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { + if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) + continue; + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) +@@ -8618,6 +9469,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + hlsl_block_cleanup(&block); + + sm4_generate_vsir_block(ctx, &func->body, program); ++ ++ generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0); + } + + /* OBJECTIVE: Translate all the information from ctx and entry_func to the +@@ -8649,9 +9502,16 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl + program->thread_group_size.z = ctx->thread_count[2]; + } + ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ generate_vsir_add_program_instruction(ctx, program, ++ &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0); + sm4_generate_vsir_add_function(ctx, func, config_flags, program); + if (version.type == VKD3D_SHADER_TYPE_HULL) ++ { ++ generate_vsir_add_program_instruction(ctx, program, ++ &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0); + sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); ++ } + } + + static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index b0e89bededb..0c06db9ff15 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -201,6 +201,14 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 + src->reg.u.immconst_u32[0] = value; + } + ++static void vsir_src_param_init_io(struct vkd3d_shader_src_param *src, ++ enum vkd3d_shader_register_type reg_type, const struct signature_element *e, unsigned int idx_count) ++{ ++ vsir_src_param_init(src, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = vsir_swizzle_from_writemask(e->mask); ++} ++ + void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) + { + vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); +@@ -278,6 +286,14 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader + param->shift = 0; + } + ++static void vsir_dst_param_init_io(struct vkd3d_shader_dst_param *dst, enum vkd3d_shader_register_type reg_type, ++ const struct signature_element *e, unsigned int idx_count) ++{ ++ vsir_dst_param_init(dst, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = e->mask; ++} ++ + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); +@@ -743,6 +759,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + case VKD3DSIH_DCL_GLOBAL_FLAGS: + case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: ++ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: + case VKD3DSIH_DCL_THREAD_GROUP: + case VKD3DSIH_DCL_UAV_TYPED: + vkd3d_shader_instruction_make_nop(ins); +@@ -1369,26 +1386,17 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param + } + } + +-static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e, +- enum vkd3d_shader_register_type reg_type, unsigned int idx_count) +-{ +- param->write_mask = e->mask; +- param->modifiers = 0; +- param->shift = 0; +- vsir_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count); +-} +- + static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser, + const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst, + const struct vkd3d_shader_location *location) + { + struct vkd3d_shader_instruction *ins; +- struct vkd3d_shader_dst_param *param; + const struct signature_element *e; +- unsigned int i, count; ++ unsigned int i, count, stride = 0; + +- for (i = 0, count = 1; i < s->element_count; ++i) +- count += !!s->elements[i].used_mask; ++ for (i = 0; i < s->element_count; ++i) ++ stride += !!s->elements[i].used_mask; ++ count = 2 + 3 * stride; + + if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count)) + return VKD3D_ERROR_OUT_OF_MEMORY; +@@ -1399,31 +1407,75 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p + + ins = &normaliser->instructions.elements[dst]; + vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE); +- ins->flags = 1; +- ++ins; ++ ++ ins = &normaliser->instructions.elements[dst + 1 + 3 * stride]; ++ vsir_instruction_init(ins, location, VKD3DSIH_RET); ++ ++ ins = &normaliser->instructions.elements[dst + 1]; + + for (i = 0; i < s->element_count; ++i) + { ++ struct vkd3d_shader_instruction *ins_in, *ins_out, *ins_mov; ++ struct vkd3d_shader_dst_param *param_in, *param_out; ++ + e = &s->elements[i]; + if (!e->used_mask) + continue; + ++ ins_in = ins; ++ ins_out = &ins[stride]; ++ ins_mov = &ins[2 * stride]; ++ + if (e->sysval_semantic != VKD3D_SHADER_SV_NONE) + { +- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV); +- param = &ins->declaration.register_semantic.reg; +- ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); ++ vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT_SIV); ++ param_in = &ins_in->declaration.register_semantic.reg; ++ ins_in->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); ++ ++ vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT_SIV); ++ param_out = &ins_out->declaration.register_semantic.reg; ++ ins_out->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic); + } + else + { +- vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT); +- param = &ins->declaration.dst; ++ vsir_instruction_init(ins_in, location, VKD3DSIH_DCL_INPUT); ++ param_in = &ins_in->declaration.dst; ++ ++ vsir_instruction_init(ins_out, location, VKD3DSIH_DCL_OUTPUT); ++ param_out = &ins_out->declaration.dst; + } + +- shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2); +- param->reg.idx[0].offset = input_control_point_count; +- param->reg.idx[1].offset = e->register_index; +- param->write_mask = e->mask; ++ vsir_dst_param_init_io(param_in, VKD3DSPR_INPUT, e, 2); ++ param_in->reg.idx[0].offset = input_control_point_count; ++ param_in->reg.idx[1].offset = e->register_index; ++ param_in->write_mask = e->mask; ++ ++ vsir_dst_param_init_io(param_out, VKD3DSPR_OUTPUT, e, 2); ++ param_out->reg.idx[0].offset = input_control_point_count; ++ param_out->reg.idx[1].offset = e->register_index; ++ param_out->write_mask = e->mask; ++ ++ vsir_instruction_init(ins_mov, location, VKD3DSIH_MOV); ++ ins_mov->dst = shader_dst_param_allocator_get(&normaliser->instructions.dst_params, 1); ++ ins_mov->dst_count = 1; ++ ins_mov->src = shader_src_param_allocator_get(&normaliser->instructions.src_params, 1); ++ ins_mov->src_count = 1; ++ ++ if (!ins_mov->dst || ! ins_mov->src) ++ { ++ WARN("Failed to allocate dst/src param.\n"); ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ } ++ ++ vsir_dst_param_init_io(&ins_mov->dst[0], VKD3DSPR_OUTPUT, e, 2); ++ ins_mov->dst[0].reg.idx[0].offset = 0; ++ ins_mov->dst[0].reg.idx[0].rel_addr = normaliser->outpointid_param; ++ ins_mov->dst[0].reg.idx[1].offset = e->register_index; ++ ++ vsir_src_param_init_io(&ins_mov->src[0], VKD3DSPR_INPUT, e, 2); ++ ins_mov->src[0].reg.idx[0].offset = 0; ++ ins_mov->src[0].reg.idx[0].rel_addr = normaliser->outpointid_param; ++ ins_mov->src[0].reg.idx[1].offset = e->register_index; + + ++ins; + } +@@ -2129,6 +2181,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par + + case VKD3DSPR_OUTCONTROLPOINT: + reg->type = VKD3DSPR_OUTPUT; ++ if (io_normaliser_is_in_fork_or_join_phase(normaliser)) ++ normaliser->use_vocp = true; + /* fall through */ + case VKD3DSPR_OUTPUT: + reg_idx = reg->idx[reg->idx_count - 1].offset; +@@ -2179,9 +2233,6 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + { + reg = &ins->declaration.dst.reg; + +- if (reg->type == VKD3DSPR_OUTCONTROLPOINT) +- normaliser->use_vocp = true; +- + /* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their + * equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */ + if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST) +@@ -6665,6 +6716,9 @@ struct validation_context + enum vkd3d_shader_opcode *blocks; + size_t depth; + size_t blocks_capacity; ++ ++ unsigned int outer_tess_idxs[4]; ++ unsigned int inner_tess_idxs[2]; + }; + + static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, +@@ -7185,6 +7239,10 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_register_without_indices(ctx, reg); + break; + ++ case VKD3DSPR_PRIMID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_NULL: + vsir_validate_register_without_indices(ctx, reg); + break; +@@ -7201,6 +7259,18 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_uav_register(ctx, reg); + break; + ++ case VKD3DSPR_OUTPOINTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_FORKINSTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_JOININSTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_INCONTROLPOINT: + vsir_validate_io_register(ctx, reg); + break; +@@ -7213,6 +7283,38 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_io_register(ctx, reg); + break; + ++ case VKD3DSPR_TESSCOORD: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_THREADID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_THREADGROUPID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_LOCALTHREADID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_LOCALTHREADINDEX: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_COVERAGE: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_SAMPLEMASK: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_GSINSTID: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_DEPTHOUTGE: + vsir_validate_register_without_indices(ctx, reg); + break; +@@ -7221,10 +7323,22 @@ static void vsir_validate_register(struct validation_context *ctx, + vsir_validate_register_without_indices(ctx, reg); + break; + ++ case VKD3DSPR_OUTSTENCILREF: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + case VKD3DSPR_SSA: + vsir_validate_ssa_register(ctx, reg); + break; + ++ case VKD3DSPR_WAVELANECOUNT: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ ++ case VKD3DSPR_WAVELANEINDEX: ++ vsir_validate_register_without_indices(ctx, reg); ++ break; ++ + default: + break; + } +@@ -7469,9 +7583,11 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + const struct shader_signature *signature, enum vsir_signature_type signature_type, + unsigned int idx) + { ++ enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; + const char *signature_type_name = signature_type_names[signature_type]; + const struct signature_element *element = &signature->elements[idx]; +- bool integer_type = false; ++ bool integer_type = false, is_outer = false; ++ unsigned int semantic_index_max = 0; + + if (element->register_count == 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, +@@ -7523,12 +7639,6 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_SAMPLE_INDEX: +- case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: +- case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: +- case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: +- case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: +- case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: +- case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: + case VKD3D_SHADER_SV_TARGET: + case VKD3D_SHADER_SV_DEPTH: + case VKD3D_SHADER_SV_COVERAGE: +@@ -7537,6 +7647,37 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + case VKD3D_SHADER_SV_STENCIL_REF: + break; + ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; ++ semantic_index_max = 4; ++ is_outer = true; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; ++ semantic_index_max = 2; ++ is_outer = false; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; ++ semantic_index_max = 3; ++ is_outer = true; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; ++ semantic_index_max = 1; ++ is_outer = false; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; ++ semantic_index_max = 2; ++ is_outer = true; ++ break; ++ + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x.", +@@ -7544,6 +7685,38 @@ static void vsir_validate_signature_element(struct validation_context *ctx, + break; + } + ++ if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) ++ { ++ if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: System value semantic %#x is only valid " ++ "in the patch constant signature.", ++ idx, signature_type_name, element->sysval_semantic); ++ ++ if (ctx->program->tess_domain != expected_tess_domain) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", ++ idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); ++ ++ if (element->semantic_index >= semantic_index_max) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", ++ idx, signature_type_name, element->semantic_index, element->sysval_semantic); ++ } ++ else ++ { ++ unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; ++ ++ if (*idx_pos != ~0u) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", ++ idx, signature_type_name, element->semantic_index, element->sysval_semantic); ++ else ++ *idx_pos = idx; ++ } ++ } ++ + if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) + { + const struct sysval_validation_data_element *data = &sysval_validation_data[element->sysval_semantic]; +@@ -7643,6 +7816,109 @@ static void vsir_validate_signature(struct validation_context *ctx, + + for (i = 0; i < signature->element_count; ++i) + vsir_validate_signature_element(ctx, signature, signature_type, i); ++ ++ if (signature_type == SIGNATURE_TYPE_PATCH_CONSTANT) ++ { ++ const struct signature_element *first_element, *element; ++ unsigned int expected_outer_count = 0; ++ unsigned int expected_inner_count = 0; ++ ++ switch (ctx->program->tess_domain) ++ { ++ case VKD3D_TESSELLATOR_DOMAIN_QUAD: ++ expected_outer_count = 4; ++ expected_inner_count = 2; ++ break; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: ++ expected_outer_count = 3; ++ expected_inner_count = 1; ++ break; ++ ++ case VKD3D_TESSELLATOR_DOMAIN_LINE: ++ expected_outer_count = 2; ++ expected_inner_count = 0; ++ break; ++ ++ default: ++ break; ++ } ++ ++ /* After I/O normalisation tessellation factors are merged in a single array. */ ++ if (ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) ++ { ++ expected_outer_count = min(1, expected_outer_count); ++ expected_inner_count = min(1, expected_inner_count); ++ } ++ ++ first_element = NULL; ++ for (i = 0; i < expected_outer_count; ++i) ++ { ++ if (ctx->outer_tess_idxs[i] == ~0u) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Missing outer system value semantic %u.", i); ++ } ++ else ++ { ++ element = &signature->elements[ctx->outer_tess_idxs[i]]; ++ ++ if (!first_element) ++ { ++ first_element = element; ++ continue; ++ } ++ ++ if (element->register_index != first_element->register_index + i) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid register index %u for outer system value semantic %u, expected %u.", ++ element->register_index, i, first_element->register_index + i); ++ } ++ ++ if (element->mask != first_element->mask) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Invalid mask %#x for outer system value semantic %u, expected %#x.", ++ element->mask, i, first_element->mask); ++ } ++ } ++ } ++ ++ first_element = NULL; ++ for (i = 0; i < expected_inner_count; ++i) ++ { ++ if (ctx->inner_tess_idxs[i] == ~0u) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "Missing inner system value semantic %u.", i); ++ } ++ else ++ { ++ element = &signature->elements[ctx->inner_tess_idxs[i]]; ++ ++ if (!first_element) ++ { ++ first_element = element; ++ continue; ++ } ++ ++ if (element->register_index != first_element->register_index + i) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, ++ "Invalid register index %u for inner system value semantic %u, expected %u.", ++ element->register_index, i, first_element->register_index + i); ++ } ++ ++ if (element->mask != first_element->mask) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, ++ "Invalid mask %#x for inner system value semantic %u, expected %#x.", ++ element->mask, i, first_element->mask); ++ } ++ } ++ } ++ } + } + + static const char *name_from_cf_type(enum vsir_control_flow_type type) +@@ -7754,6 +8030,39 @@ static void vsir_validate_dcl_hs_max_tessfactor(struct validation_context *ctx, + instruction->declaration.max_tessellation_factor); + } + ++static void vsir_validate_dcl_input(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.dst.reg.type) ++ { ++ /* Signature input registers. */ ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_INCONTROLPOINT: ++ case VKD3DSPR_OUTCONTROLPOINT: ++ case VKD3DSPR_PATCHCONST: ++ /* Non-signature input registers. */ ++ case VKD3DSPR_PRIMID: ++ case VKD3DSPR_FORKINSTID: ++ case VKD3DSPR_JOININSTID: ++ case VKD3DSPR_THREADID: ++ case VKD3DSPR_THREADGROUPID: ++ case VKD3DSPR_LOCALTHREADID: ++ case VKD3DSPR_LOCALTHREADINDEX: ++ case VKD3DSPR_COVERAGE: ++ case VKD3DSPR_TESSCOORD: ++ case VKD3DSPR_OUTPOINTID: ++ case VKD3DSPR_GSINSTID: ++ case VKD3DSPR_WAVELANECOUNT: ++ case VKD3DSPR_WAVELANEINDEX: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT.", ++ instruction->declaration.dst.reg.type); ++ } ++} ++ + static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) + { +@@ -7763,6 +8072,105 @@ static void vsir_validate_dcl_input_primitive(struct validation_context *ctx, + instruction->declaration.primitive_type.type); + } + ++static void vsir_validate_dcl_input_ps(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.dst.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_PS.", ++ instruction->declaration.dst.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_ps_sgv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_PS_SGV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_ps_siv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_PS_SIV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_sgv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_SGV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_input_siv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_INPUT: ++ case VKD3DSPR_PATCHCONST: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_INPUT_SIV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ ++static void vsir_validate_dcl_output(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.dst.reg.type) ++ { ++ /* Signature output registers. */ ++ case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_PATCHCONST: ++ /* Non-signature output registers. */ ++ case VKD3DSPR_DEPTHOUT: ++ case VKD3DSPR_SAMPLEMASK: ++ case VKD3DSPR_DEPTHOUTGE: ++ case VKD3DSPR_DEPTHOUTLE: ++ case VKD3DSPR_OUTSTENCILREF: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_OUTPUT.", ++ instruction->declaration.dst.reg.type); ++ } ++} ++ + static void vsir_validate_dcl_output_control_point_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) + { +@@ -7772,6 +8180,22 @@ static void vsir_validate_dcl_output_control_point_count(struct validation_conte + instruction->declaration.count); + } + ++static void vsir_validate_dcl_output_siv(struct validation_context *ctx, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ switch (instruction->declaration.register_semantic.reg.reg.type) ++ { ++ case VKD3DSPR_OUTPUT: ++ case VKD3DSPR_PATCHCONST: ++ break; ++ ++ default: ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, ++ "Invalid register type %#x in instruction DCL_OUTPUT_SIV.", ++ instruction->declaration.register_semantic.reg.reg.type); ++ } ++} ++ + static void vsir_validate_dcl_output_topology(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction) + { +@@ -7801,6 +8225,11 @@ static void vsir_validate_dcl_tessellator_domain(struct validation_context *ctx, + || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); ++ ++ if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", ++ instruction->declaration.tessellator_domain, ctx->program->tess_domain); + } + + static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, +@@ -8063,8 +8492,16 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ + [VKD3DSIH_HS_JOIN_PHASE] = {0, 0, vsir_validate_hull_shader_phase}, + [VKD3DSIH_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, + [VKD3DSIH_DCL_HS_MAX_TESSFACTOR] = {0, 0, vsir_validate_dcl_hs_max_tessfactor}, ++ [VKD3DSIH_DCL_INPUT] = {0, 0, vsir_validate_dcl_input}, + [VKD3DSIH_DCL_INPUT_PRIMITIVE] = {0, 0, vsir_validate_dcl_input_primitive}, ++ [VKD3DSIH_DCL_INPUT_PS] = {0, 0, vsir_validate_dcl_input_ps}, ++ [VKD3DSIH_DCL_INPUT_PS_SGV] = {0, 0, vsir_validate_dcl_input_ps_sgv}, ++ [VKD3DSIH_DCL_INPUT_PS_SIV] = {0, 0, vsir_validate_dcl_input_ps_siv}, ++ [VKD3DSIH_DCL_INPUT_SGV] = {0, 0, vsir_validate_dcl_input_sgv}, ++ [VKD3DSIH_DCL_INPUT_SIV] = {0, 0, vsir_validate_dcl_input_siv}, ++ [VKD3DSIH_DCL_OUTPUT] = {0, 0, vsir_validate_dcl_output}, + [VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT] = {0, 0, vsir_validate_dcl_output_control_point_count}, ++ [VKD3DSIH_DCL_OUTPUT_SIV] = {0, 0, vsir_validate_dcl_output_siv}, + [VKD3DSIH_DCL_OUTPUT_TOPOLOGY] = {0, 0, vsir_validate_dcl_output_topology}, + [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps}, + [VKD3DSIH_DCL_TESSELLATOR_DOMAIN] = {0, 0, vsir_validate_dcl_tessellator_domain}, +@@ -8177,6 +8614,12 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + .status = VKD3D_OK, + .phase = VKD3DSIH_INVALID, + .invalid_instruction_idx = true, ++ .outer_tess_idxs[0] = ~0u, ++ .outer_tess_idxs[1] = ~0u, ++ .outer_tess_idxs[2] = ~0u, ++ .outer_tess_idxs[3] = ~0u, ++ .inner_tess_idxs[0] = ~0u, ++ .inner_tess_idxs[1] = ~0u, + }; + unsigned int i; + +@@ -8187,12 +8630,20 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: ++ if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID ++ || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Invalid tessellation domain %#x.", program->tess_domain); + break; + + default: + if (program->patch_constant_signature.element_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Patch constant signature is only valid for hull and domain shaders."); ++ ++ if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Invalid tessellation domain %#x.", program->tess_domain); + } + + switch (program->shader_version.type) +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index df3edeaa4e6..29f51088728 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -153,6 +153,64 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, + msl_print_register_datatype(buffer, gen, reg->data_type); + break; + ++ case VKD3DSPR_IMMCONST: ++ switch (reg->dimension) ++ { ++ case VSIR_DIMENSION_SCALAR: ++ switch (reg->data_type) ++ { ++ case VKD3D_DATA_INT: ++ vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); ++ break; ++ case VKD3D_DATA_UINT: ++ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); ++ break; ++ case VKD3D_DATA_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "as_type(%#xu)", reg->u.immconst_u32[0]); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "", reg->data_type); ++ break; ++ } ++ break; ++ ++ case VSIR_DIMENSION_VEC4: ++ switch (reg->data_type) ++ { ++ case VKD3D_DATA_INT: ++ vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", ++ reg->u.immconst_u32[0], reg->u.immconst_u32[1], ++ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); ++ break; ++ case VKD3D_DATA_UINT: ++ vkd3d_string_buffer_printf(buffer, "uint4(%#xu, %#xu, %#xu, %#xu)", ++ reg->u.immconst_u32[0], reg->u.immconst_u32[1], ++ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); ++ vkd3d_string_buffer_printf(buffer, "%#xu", reg->u.immconst_u32[0]); ++ break; ++ case VKD3D_DATA_FLOAT: ++ vkd3d_string_buffer_printf(buffer, "as_type(uint4(%#xu, %#xu, %#xu, %#xu))", ++ reg->u.immconst_u32[0], reg->u.immconst_u32[1], ++ reg->u.immconst_u32[2], reg->u.immconst_u32[3]); ++ break; ++ default: ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled immconst datatype %#x.", reg->data_type); ++ vkd3d_string_buffer_printf(buffer, "", reg->data_type); ++ break; ++ } ++ break; ++ ++ default: ++ vkd3d_string_buffer_printf(buffer, "", reg->dimension); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled dimension %#x.", reg->dimension); ++ break; ++ } ++ break; ++ + case VKD3DSPR_CONSTBUFFER: + if (reg->idx_count != 3) + { +@@ -215,19 +273,43 @@ static void msl_src_init(struct msl_src *msl_src, struct msl_generator *gen, + const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) + { + const struct vkd3d_shader_register *reg = &vsir_src->reg; ++ struct vkd3d_string_buffer *str; + + msl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + + if (reg->non_uniform) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); +- if (vsir_src->modifiers) +- msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +- "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + +- msl_print_register_name(msl_src->str, gen, reg); ++ if (!vsir_src->modifiers) ++ str = msl_src->str; ++ else ++ str = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ msl_print_register_name(str, gen, reg); + if (reg->dimension == VSIR_DIMENSION_VEC4) +- msl_print_swizzle(msl_src->str, vsir_src->swizzle, mask); ++ msl_print_swizzle(str, vsir_src->swizzle, mask); ++ ++ switch (vsir_src->modifiers) ++ { ++ case VKD3DSPSM_NONE: ++ break; ++ case VKD3DSPSM_NEG: ++ vkd3d_string_buffer_printf(msl_src->str, "-%s", str->buffer); ++ break; ++ case VKD3DSPSM_ABS: ++ vkd3d_string_buffer_printf(msl_src->str, "abs(%s)", str->buffer); ++ break; ++ default: ++ vkd3d_string_buffer_printf(msl_src->str, "(%s)", ++ vsir_src->modifiers, str->buffer); ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); ++ break; ++ } ++ ++ if (str != msl_src->str) ++ vkd3d_string_buffer_release(&gen->string_buffers, str); + } + + static void msl_dst_cleanup(struct msl_dst *dst, struct vkd3d_string_buffer_cache *cache) +@@ -288,6 +370,80 @@ static void msl_unhandled(struct msl_generator *gen, const struct vkd3d_shader_i + "Internal compiler error: Unhandled instruction %#x.", ins->opcode); + } + ++static void msl_binop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) ++{ ++ struct msl_src src[2]; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src[0], gen, &ins->src[0], mask); ++ msl_src_init(&src[1], gen, &ins->src[1], mask); ++ ++ msl_print_assignment(gen, &dst, "%s %s %s", src[0].str->buffer, op, src[1].str->buffer); ++ ++ msl_src_cleanup(&src[1], &gen->string_buffers); ++ msl_src_cleanup(&src[0], &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_intrinsic(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) ++{ ++ struct msl_src src; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src, gen, &ins->src[0], mask); ++ ++ msl_print_assignment(gen, &dst, "%s(%s)", op, src.str->buffer); ++ ++ msl_src_cleanup(&src, &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_relop(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *op) ++{ ++ unsigned int mask_size; ++ struct msl_src src[2]; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src[0], gen, &ins->src[0], mask); ++ msl_src_init(&src[1], gen, &ins->src[1], mask); ++ ++ if ((mask_size = vsir_write_mask_component_count(mask)) > 1) ++ msl_print_assignment(gen, &dst, "select(uint%u(0u), uint%u(0xffffffffu), bool%u(%s %s %s))", ++ mask_size, mask_size, mask_size, src[0].str->buffer, op, src[1].str->buffer); ++ else ++ msl_print_assignment(gen, &dst, "%s %s %s ? 0xffffffffu : 0u", ++ src[0].str->buffer, op, src[1].str->buffer); ++ ++ msl_src_cleanup(&src[1], &gen->string_buffers); ++ msl_src_cleanup(&src[0], &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void msl_cast(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins, const char *constructor) ++{ ++ unsigned int component_count; ++ struct msl_src src; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src, gen, &ins->src[0], mask); ++ ++ if ((component_count = vsir_write_mask_component_count(mask)) > 1) ++ msl_print_assignment(gen, &dst, "%s%u(%s)", constructor, component_count, src.str->buffer); ++ else ++ msl_print_assignment(gen, &dst, "%s(%s)", constructor, src.str->buffer); ++ ++ msl_src_cleanup(&src, &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ + static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + struct msl_src src; +@@ -303,6 +459,31 @@ static void msl_mov(struct msl_generator *gen, const struct vkd3d_shader_instruc + msl_dst_cleanup(&dst, &gen->string_buffers); + } + ++static void msl_movc(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ unsigned int component_count; ++ struct msl_src src[3]; ++ struct msl_dst dst; ++ uint32_t mask; ++ ++ mask = msl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ msl_src_init(&src[0], gen, &ins->src[0], mask); ++ msl_src_init(&src[1], gen, &ins->src[1], mask); ++ msl_src_init(&src[2], gen, &ins->src[2], mask); ++ ++ if ((component_count = vsir_write_mask_component_count(mask)) > 1) ++ msl_print_assignment(gen, &dst, "select(%s, %s, bool%u(%s))", ++ src[2].str->buffer, src[1].str->buffer, component_count, src[0].str->buffer); ++ else ++ msl_print_assignment(gen, &dst, "select(%s, %s, bool(%s))", ++ src[2].str->buffer, src[1].str->buffer, src[0].str->buffer); ++ ++ msl_src_cleanup(&src[2], &gen->string_buffers); ++ msl_src_cleanup(&src[1], &gen->string_buffers); ++ msl_src_cleanup(&src[0], &gen->string_buffers); ++ msl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ + static void msl_ret(struct msl_generator *gen, const struct vkd3d_shader_instruction *ins) + { + msl_print_indent(gen->buffer, gen->indent); +@@ -315,17 +496,61 @@ static void msl_handle_instruction(struct msl_generator *gen, const struct vkd3d + + switch (ins->opcode) + { ++ case VKD3DSIH_ADD: ++ msl_binop(gen, ins, "+"); ++ break; ++ case VKD3DSIH_AND: ++ msl_binop(gen, ins, "&"); ++ break; + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_OUTPUT: + case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_NOP: + break; ++ case VKD3DSIH_DIV: ++ msl_binop(gen, ins, "/"); ++ break; ++ case VKD3DSIH_FRC: ++ msl_intrinsic(gen, ins, "fract"); ++ break; ++ case VKD3DSIH_FTOI: ++ msl_cast(gen, ins, "int"); ++ break; ++ case VKD3DSIH_FTOU: ++ msl_cast(gen, ins, "uint"); ++ break; ++ case VKD3DSIH_GEO: ++ msl_relop(gen, ins, ">="); ++ break; ++ case VKD3DSIH_INE: ++ case VKD3DSIH_NEU: ++ msl_relop(gen, ins, "!="); ++ break; ++ case VKD3DSIH_ITOF: ++ case VKD3DSIH_UTOF: ++ msl_cast(gen, ins, "float"); ++ break; + case VKD3DSIH_MOV: + msl_mov(gen, ins); + break; ++ case VKD3DSIH_MOVC: ++ msl_movc(gen, ins); ++ break; ++ case VKD3DSIH_MUL: ++ msl_binop(gen, ins, "*"); ++ break; ++ case VKD3DSIH_OR: ++ msl_binop(gen, ins, "|"); ++ break; + case VKD3DSIH_RET: + msl_ret(gen, ins); + break; ++ case VKD3DSIH_ROUND_PI: ++ msl_intrinsic(gen, ins, "ceil"); ++ break; ++ case VKD3DSIH_ROUND_Z: ++ msl_intrinsic(gen, ins, "trunc"); ++ break; + default: + msl_unhandled(gen, ins); + break; +@@ -790,6 +1015,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader + MESSAGE("Generating a MSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); ++ vkd3d_string_buffer_printf(gen->buffer, "#include \n\n"); ++ vkd3d_string_buffer_printf(gen->buffer, "using namespace metal;\n\n"); + + if (gen->program->global_flags) + msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l +index 4a8d0fddae1..d167415c356 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l +@@ -20,6 +20,7 @@ + + %{ + ++#include "preproc.h" + #include "preproc.tab.h" + + #undef ERROR /* defined in wingdi.h */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index bdfd632ad12..005b40a9d1f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -2471,7 +2471,6 @@ struct spirv_compiler + bool emit_point_size; + + enum vkd3d_shader_opcode phase; +- bool emit_default_control_point_phase; + struct vkd3d_shader_phase control_point_phase; + struct vkd3d_shader_phase patch_constant_phase; + +@@ -6822,15 +6821,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler + compiler->spirv_builder.invocation_count = instruction->declaration.count; + } + +-static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, ++ enum vkd3d_tessellator_domain domain) + { +- enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; + SpvExecutionMode mode; + +- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) +- return; +- + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: +@@ -6916,15 +6911,10 @@ static void spirv_compiler_emit_thread_group_size(struct spirv_compiler *compile + SpvExecutionModeLocalSize, local_size, ARRAY_SIZE(local_size)); + } + +-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler); +- + static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + +- if (is_in_control_point_phase(compiler) && compiler->emit_default_control_point_phase) +- spirv_compiler_emit_default_control_point_phase(compiler); +- + vkd3d_spirv_build_op_function_end(builder); + + if (is_in_control_point_phase(compiler)) +@@ -6969,9 +6959,6 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, + phase->function_id = function_id; + /* The insertion location must be set after the label is emitted. */ + phase->function_location = 0; +- +- if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) +- compiler->emit_default_control_point_phase = instruction->flags; + } + + static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) +@@ -7000,63 +6987,6 @@ static void spirv_compiler_initialise_block(struct spirv_compiler *compiler) + } + } + +-static void spirv_compiler_emit_default_control_point_phase(struct spirv_compiler *compiler) +-{ +- const struct shader_signature *output_signature = &compiler->output_signature; +- const struct shader_signature *input_signature = &compiler->input_signature; +- uint32_t type_id, output_ptr_type_id, input_id, dst_id, invocation_id; +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- enum vkd3d_shader_component_type component_type; +- struct vkd3d_shader_src_param invocation; +- struct vkd3d_shader_register input_reg; +- unsigned int component_count; +- unsigned int i; +- +- vkd3d_spirv_build_op_label(builder, vkd3d_spirv_alloc_id(builder)); +- spirv_compiler_initialise_block(compiler); +- invocation_id = spirv_compiler_emit_load_invocation_id(compiler); +- +- memset(&invocation, 0, sizeof(invocation)); +- vsir_register_init(&invocation.reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_INT, 0); +- invocation.swizzle = VKD3D_SHADER_NO_SWIZZLE; +- +- vsir_register_init(&input_reg, VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 2); +- input_reg.idx[0].offset = 0; +- input_reg.idx[0].rel_addr = &invocation; +- input_reg.idx[1].offset = 0; +- input_id = spirv_compiler_get_register_id(compiler, &input_reg); +- +- VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); +- for (i = 0; i < output_signature->element_count; ++i) +- { +- const struct signature_element *output = &output_signature->elements[i]; +- const struct signature_element *input = &input_signature->elements[i]; +- struct vkd3d_shader_register_info output_reg_info; +- struct vkd3d_shader_register output_reg; +- +- VKD3D_ASSERT(input->mask == output->mask); +- VKD3D_ASSERT(input->component_type == output->component_type); +- +- input_reg.idx[1].offset = i; +- input_id = spirv_compiler_get_register_id(compiler, &input_reg); +- +- vsir_register_init(&output_reg, VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); +- output_reg.idx[0].offset = i; +- spirv_compiler_get_register_info(compiler, &output_reg, &output_reg_info); +- +- component_type = output->component_type; +- component_count = vsir_write_mask_component_count(output->mask); +- type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); +- output_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassOutput, type_id); +- +- dst_id = vkd3d_spirv_build_op_access_chain1(builder, output_ptr_type_id, output_reg_info.id, invocation_id); +- +- vkd3d_spirv_build_op_copy_memory(builder, dst_id, input_id, SpvMemoryAccessMaskNone); +- } +- +- vkd3d_spirv_build_op_return(builder); +-} +- + static void spirv_compiler_emit_barrier(struct spirv_compiler *compiler, + SpvScope execution_scope, SpvScope memory_scope, SpvMemorySemanticsMask semantics) + { +@@ -10239,9 +10169,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + spirv_compiler_emit_output_vertex_count(compiler, instruction); + break; +- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: +- spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); +- break; + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + spirv_compiler_emit_tessellator_output_primitive(compiler, + instruction->declaration.tessellator_output_primitive); +@@ -10743,6 +10670,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + compiler->input_control_point_count = program->input_control_point_count; + compiler->output_control_point_count = program->output_control_point_count; + ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) ++ spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); ++ + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 9c41e2c2053..409328b2e53 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -707,6 +707,7 @@ struct vkd3d_sm4_opcode_info + char src_info[SM4_MAX_SRC_COUNT]; + void (*read_opcode_func)(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, + const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv); ++ bool is_conditional_op; + }; + + static const enum vkd3d_primitive_type output_primitive_type_table[] = +@@ -1268,6 +1269,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi + { + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; ++ priv->p.program->tess_domain = ins->declaration.tessellator_domain; + } + + static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1439,18 +1441,18 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_OP_AND, VKD3DSIH_AND, "u", "uu"}, + {VKD3D_SM4_OP_BREAK, VKD3DSIH_BREAK, "", ""}, + {VKD3D_SM4_OP_BREAKC, VKD3DSIH_BREAKP, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_CASE, VKD3DSIH_CASE, "", "u", + shader_sm4_read_case_condition}, + {VKD3D_SM4_OP_CONTINUE, VKD3DSIH_CONTINUE, "", ""}, + {VKD3D_SM4_OP_CONTINUEC, VKD3DSIH_CONTINUEP, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_CUT, VKD3DSIH_CUT, "", ""}, + {VKD3D_SM4_OP_DEFAULT, VKD3DSIH_DEFAULT, "", ""}, + {VKD3D_SM4_OP_DERIV_RTX, VKD3DSIH_DSX, "f", "f"}, + {VKD3D_SM4_OP_DERIV_RTY, VKD3DSIH_DSY, "f", "f"}, + {VKD3D_SM4_OP_DISCARD, VKD3DSIH_DISCARD, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_DIV, VKD3DSIH_DIV, "f", "ff"}, + {VKD3D_SM4_OP_DP2, VKD3DSIH_DP2, "f", "ff"}, + {VKD3D_SM4_OP_DP3, VKD3DSIH_DP3, "f", "ff"}, +@@ -1468,7 +1470,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_OP_GE, VKD3DSIH_GEO, "u", "ff"}, + {VKD3D_SM4_OP_IADD, VKD3DSIH_IADD, "i", "ii"}, + {VKD3D_SM4_OP_IF, VKD3DSIH_IF, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_IEQ, VKD3DSIH_IEQ, "u", "ii"}, + {VKD3D_SM4_OP_IGE, VKD3DSIH_IGE, "u", "ii"}, + {VKD3D_SM4_OP_ILT, VKD3DSIH_ILT, "u", "ii"}, +@@ -1502,7 +1504,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_OP_RESINFO, VKD3DSIH_RESINFO, "f", "i*"}, + {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, + {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", +- shader_sm4_read_conditional_op}, ++ shader_sm4_read_conditional_op, true}, + {VKD3D_SM4_OP_ROUND_NE, VKD3DSIH_ROUND_NE, "f", "f"}, + {VKD3D_SM4_OP_ROUND_NI, VKD3DSIH_ROUND_NI, "f", "f"}, + {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, +@@ -1967,16 +1969,6 @@ static const struct vkd3d_sm4_register_type_info *get_info_from_vkd3d_register_t + return lookup->register_type_info_from_vkd3d[vkd3d_type]; + } + +-static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( +- const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_register_type vkd3d_type) +-{ +- const struct vkd3d_sm4_register_type_info *register_type_info = +- get_info_from_vkd3d_register_type(lookup, vkd3d_type); +- +- VKD3D_ASSERT(register_type_info); +- return register_type_info->default_src_swizzle_type; +-} +- + static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode) + { +@@ -2993,8 +2985,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + return VKD3D_OK; + } + +-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); +- + bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) + { +@@ -3331,6 +3321,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -4096,297 +4087,6 @@ struct sm4_instruction + unsigned int idx_src_count; + }; + +-static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, +- const struct hlsl_ir_node *instr) +-{ +- VKD3D_ASSERT(instr->reg.allocated); +- reg->type = VKD3DSPR_TEMP; +- reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = instr->reg.id; +- reg->idx_count = 1; +- *writemask = instr->reg.writemask; +-} +- +-static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_register *reg, +- enum vkd3d_shader_register_type type, uint32_t *writemask, const struct hlsl_deref *deref, +- struct sm4_instruction *sm4_instr) +-{ +- const struct hlsl_ir_var *var = deref->var; +- unsigned int offset_const_deref; +- +- reg->type = type; +- reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; +- reg->dimension = VSIR_DIMENSION_VEC4; +- +- VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); +- +- if (!var->indexable) +- { +- offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx[0].offset += offset_const_deref / 4; +- reg->idx_count = 1; +- } +- else +- { +- offset_const_deref = deref->const_offset; +- reg->idx[1].offset = offset_const_deref / 4; +- reg->idx_count = 2; +- +- if (deref->rel_offset.node) +- { +- struct vkd3d_shader_src_param *idx_src; +- unsigned int idx_writemask; +- +- VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); +- idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; +- memset(idx_src, 0, sizeof(*idx_src)); +- +- reg->idx[1].rel_addr = idx_src; +- sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); +- VKD3D_ASSERT(idx_writemask != 0); +- idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); +- } +- } +- +- *writemask = 0xf & (0xf << (offset_const_deref % 4)); +- if (var->regs[HLSL_REGSET_NUMERIC].writemask) +- *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); +-} +- +-static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_register *reg, +- uint32_t *writemask, const struct hlsl_deref *deref, struct sm4_instruction *sm4_instr) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- const struct hlsl_type *data_type = hlsl_deref_get_type(tpf->ctx, deref); +- const struct hlsl_ir_var *var = deref->var; +- struct hlsl_ctx *ctx = tpf->ctx; +- +- if (var->is_uniform) +- { +- enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); +- +- if (regset == HLSL_REGSET_TEXTURES) +- { +- reg->type = VKD3DSPR_RESOURCE; +- reg->dimension = VSIR_DIMENSION_VEC4; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; +- reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ +- reg->idx_count = 2; +- } +- else +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx_count = 1; +- } +- VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else if (regset == HLSL_REGSET_UAVS) +- { +- reg->type = VKD3DSPR_UAV; +- reg->dimension = VSIR_DIMENSION_VEC4; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; +- reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ +- reg->idx_count = 2; +- } +- else +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx_count = 1; +- } +- VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else if (regset == HLSL_REGSET_SAMPLERS) +- { +- reg->type = VKD3DSPR_SAMPLER; +- reg->dimension = VSIR_DIMENSION_NONE; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; +- reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ +- reg->idx_count = 2; +- } +- else +- { +- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- reg->idx_count = 1; +- } +- VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); +- *writemask = VKD3DSP_WRITEMASK_ALL; +- } +- else +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; +- +- VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); +- reg->type = VKD3DSPR_CONSTBUFFER; +- reg->dimension = VSIR_DIMENSION_VEC4; +- if (vkd3d_shader_ver_ge(version, 5, 1)) +- { +- reg->idx[0].offset = var->buffer->reg.id; +- reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ +- reg->idx[2].offset = offset / 4; +- reg->idx_count = 3; +- } +- else +- { +- reg->idx[0].offset = var->buffer->reg.index; +- reg->idx[1].offset = offset / 4; +- reg->idx_count = 2; +- } +- *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); +- } +- } +- else if (var->is_input_semantic) +- { +- bool has_idx; +- +- if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); +- +- if (has_idx) +- { +- reg->idx[0].offset = var->semantic.index + offset / 4; +- reg->idx_count = 1; +- } +- +- if (shader_sm4_is_scalar_register(reg)) +- reg->dimension = VSIR_DIMENSION_SCALAR; +- else +- reg->dimension = VSIR_DIMENSION_VEC4; +- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); +- } +- else +- { +- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); +- +- VKD3D_ASSERT(hlsl_reg.allocated); +- +- if (version->type == VKD3D_SHADER_TYPE_DOMAIN) +- reg->type = VKD3DSPR_PATCHCONST; +- else +- reg->type = VKD3DSPR_INPUT; +- reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = hlsl_reg.id; +- reg->idx_count = 1; +- *writemask = hlsl_reg.writemask; +- } +- } +- else if (var->is_output_semantic) +- { +- bool has_idx; +- +- if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) +- { +- unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); +- +- if (has_idx) +- { +- reg->idx[0].offset = var->semantic.index + offset / 4; +- reg->idx_count = 1; +- } +- +- if (shader_sm4_is_scalar_register(reg)) +- reg->dimension = VSIR_DIMENSION_SCALAR; +- else +- reg->dimension = VSIR_DIMENSION_VEC4; +- *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); +- } +- else +- { +- struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); +- +- VKD3D_ASSERT(hlsl_reg.allocated); +- reg->type = VKD3DSPR_OUTPUT; +- reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = hlsl_reg.id; +- reg->idx_count = 1; +- *writemask = hlsl_reg.writemask; +- } +- } +- else +- { +- enum vkd3d_shader_register_type type = deref->var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; +- +- sm4_numeric_register_from_deref(ctx, reg, type, writemask, deref, sm4_instr); +- } +-} +- +-static void sm4_src_from_deref(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, +- const struct hlsl_deref *deref, unsigned int map_writemask, struct sm4_instruction *sm4_instr) +-{ +- unsigned int hlsl_swizzle; +- uint32_t writemask; +- +- sm4_register_from_deref(tpf, &src->reg, &writemask, deref, sm4_instr); +- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) +- { +- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +- src->swizzle = swizzle_from_sm4(hlsl_swizzle); +- } +-} +- +-static void sm4_dst_from_node(struct vkd3d_shader_dst_param *dst, const struct hlsl_ir_node *instr) +-{ +- sm4_register_from_node(&dst->reg, &dst->write_mask, instr); +-} +- +-static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, +- const struct hlsl_constant_value *value, unsigned int width, unsigned int map_writemask) +-{ +- src->swizzle = 0; +- src->reg.type = VKD3DSPR_IMMCONST; +- if (width == 1) +- { +- src->reg.dimension = VSIR_DIMENSION_SCALAR; +- src->reg.u.immconst_u32[0] = value->u[0].u; +- } +- else +- { +- unsigned int i, j = 0; +- +- src->reg.dimension = VSIR_DIMENSION_VEC4; +- for (i = 0; i < 4; ++i) +- { +- if ((map_writemask & (1u << i)) && (j < width)) +- src->reg.u.immconst_u32[i] = value->u[j++].u; +- else +- src->reg.u.immconst_u32[i] = 0; +- } +- } +-} +- +-static void sm4_src_from_node(const struct tpf_compiler *tpf, struct vkd3d_shader_src_param *src, +- const struct hlsl_ir_node *instr, uint32_t map_writemask) +-{ +- unsigned int hlsl_swizzle; +- uint32_t writemask; +- +- if (instr->type == HLSL_IR_CONSTANT) +- { +- struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); +- +- sm4_src_from_constant_value(src, &constant->value, instr->data_type->dimx, map_writemask); +- return; +- } +- +- sm4_register_from_node(&src->reg, &writemask, instr); +- if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) +- { +- hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); +- src->swizzle = swizzle_from_sm4(hlsl_swizzle); +- } +-} +- + static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_register *reg, + unsigned int i) + { +@@ -4650,33 +4350,6 @@ static void write_sm4_instruction(const struct tpf_compiler *tpf, const struct s + sm4_update_stat_counters(tpf, instr); + } + +-static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, +- const struct hlsl_ir_node *texel_offset) +-{ +- struct sm4_instruction_modifier modif; +- struct hlsl_ir_constant *offset; +- +- if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT) +- return false; +- offset = hlsl_ir_constant(texel_offset); +- +- modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI; +- modif.u.aoffimmi.u = offset->value.u[0].i; +- modif.u.aoffimmi.v = 0; +- modif.u.aoffimmi.w = 0; +- if (offset->node.data_type->dimx > 1) +- modif.u.aoffimmi.v = offset->value.u[1].i; +- if (offset->node.data_type->dimx > 2) +- modif.u.aoffimmi.w = offset->value.u[2].i; +- if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7 +- || modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7 +- || modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7) +- return false; +- +- instr->modifiers[instr->modifier_count++] = modif; +- return true; +-} +- + static void write_sm4_dcl_constant_buffer(const struct tpf_compiler *tpf, const struct hlsl_buffer *cbuffer) + { + size_t size = (cbuffer->used_size + 3) / 4; +@@ -4945,26 +4618,6 @@ static void tpf_write_hs_decls(const struct tpf_compiler *tpf) + write_sm4_instruction(tpf, &instr); + } + +-static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, +- }; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, +- }; +- +- write_sm4_instruction(tpf, &instr); +-} +- + static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) + { + struct sm4_instruction instr = +@@ -5022,594 +4675,9 @@ static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ret(const struct tpf_compiler *tpf) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_RET, +- }; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, +- const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, +- enum hlsl_sampler_dim dim) +-{ +- const struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, resource); +- bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE +- && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); +- bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; +- unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- if (uav) +- instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; +- else if (raw) +- instr.opcode = VKD3D_SM5_OP_LD_RAW; +- else +- instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7."); +- return; +- } +- } +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- if (!uav) +- { +- /* Mipmap level is in the last component in the IR, but needs to be in the W +- * component in the instruction. */ +- unsigned int dim_count = hlsl_sampler_dim_count(dim); +- +- if (dim_count == 1) +- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3; +- if (dim_count == 2) +- coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3; +- } +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, coords_writemask); +- +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- +- instr.src_count = 2; +- +- if (multisampled) +- { +- if (sample_index->type == HLSL_IR_CONSTANT) +- { +- struct vkd3d_shader_register *reg = &instr.srcs[2].reg; +- struct hlsl_ir_constant *index; +- +- index = hlsl_ir_constant(sample_index); +- +- memset(&instr.srcs[2], 0, sizeof(instr.srcs[2])); +- reg->type = VKD3DSPR_IMMCONST; +- reg->dimension = VSIR_DIMENSION_SCALAR; +- reg->u.immconst_u32[0] = index->value.u[0].u; +- } +- else if (version->major == 4 && version->minor == 0) +- { +- hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); +- } +- else +- { +- sm4_src_from_node(tpf, &instr.srcs[2], sample_index, 0); +- } +- +- ++instr.src_count; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_sample(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; +- const struct hlsl_ir_node *coords = load->coords.node; +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_deref *sampler = &load->sampler; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- switch (load->load_type) +- { +- case HLSL_RESOURCE_SAMPLE: +- instr.opcode = VKD3D_SM4_OP_SAMPLE; +- break; +- +- case HLSL_RESOURCE_SAMPLE_CMP: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_C; +- break; +- +- case HLSL_RESOURCE_SAMPLE_CMP_LZ: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_C_LZ; +- break; +- +- case HLSL_RESOURCE_SAMPLE_LOD: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_LOD; +- break; +- +- case HLSL_RESOURCE_SAMPLE_LOD_BIAS: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_B; +- break; +- +- case HLSL_RESOURCE_SAMPLE_GRAD: +- instr.opcode = VKD3D_SM4_OP_SAMPLE_GRAD; +- break; +- +- default: +- vkd3d_unreachable(); +- } +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7."); +- return; +- } +- } +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); +- instr.src_count = 3; +- +- if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD +- || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); +- ++instr.src_count; +- } +- else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); +- instr.src_count += 2; +- } +- else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP +- || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); +- ++instr.src_count; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; +- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) +- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER +- || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- { +- hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); +- return; +- } +- +- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_RESINFO; +- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) +- instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_IF, +- .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, +- .src_count = 1, +- }; +- +- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); +- +- sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &iff->then_block); +- +- if (!list_empty(&iff->else_block.instrs)) +- { +- instr.opcode = VKD3D_SM4_OP_ELSE; +- instr.src_count = 0; +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &iff->else_block); +- } +- +- instr.opcode = VKD3D_SM4_OP_ENDIF; +- instr.src_count = 0; +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) +-{ +- struct sm4_instruction instr = {0}; +- +- switch (jump->type) +- { +- case HLSL_IR_JUMP_BREAK: +- instr.opcode = VKD3D_SM4_OP_BREAK; +- break; +- +- case HLSL_IR_JUMP_CONTINUE: +- instr.opcode = VKD3D_SM4_OP_CONTINUE; +- break; +- +- case HLSL_IR_JUMP_DISCARD_NZ: +- { +- instr.opcode = VKD3D_SM4_OP_DISCARD; +- instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; +- +- memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); +- instr.src_count = 1; +- sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); +- break; +- } +- +- case HLSL_IR_JUMP_RETURN: +- vkd3d_unreachable(); +- +- default: +- hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); +- return; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-/* Does this variable's data come directly from the API user, rather than being +- * temporary or from a previous shader stage? +- * I.e. is it a uniform or VS input? */ +-static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) +-{ +- if (var->is_uniform) +- return true; +- +- return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; +-} +- +-static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- const struct hlsl_type *type = load->node.data_type; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- +- sm4_dst_from_node(&instr.dsts[0], &load->node); +- instr.dst_count = 1; +- +- VKD3D_ASSERT(hlsl_is_numeric_type(type)); +- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) +- { +- struct hlsl_constant_value value; +- +- /* Uniform bools can be specified as anything, but internal bools always +- * have 0 for false and ~0 for true. Normalize that here. */ +- +- instr.opcode = VKD3D_SM4_OP_MOVC; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); +- +- memset(&value, 0xff, sizeof(value)); +- sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); +- memset(&value, 0, sizeof(value)); +- sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); +- instr.src_count = 3; +- } +- else +- { +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); +- instr.src_count = 1; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) +-{ +- struct sm4_instruction instr = +- { +- .opcode = VKD3D_SM4_OP_LOOP, +- }; +- +- write_sm4_instruction(tpf, &instr); +- +- write_sm4_block(tpf, &loop->body); +- +- instr.opcode = VKD3D_SM4_OP_ENDLOOP; +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_gather(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, +- const struct hlsl_deref *resource, const struct hlsl_deref *sampler, +- const struct hlsl_ir_node *coords, uint32_t swizzle, const struct hlsl_ir_node *texel_offset) +-{ +- const struct vkd3d_shader_version *version = &tpf->program->shader_version; +- struct vkd3d_shader_src_param *src; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- +- instr.opcode = VKD3D_SM4_OP_GATHER4; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL); +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- if (!vkd3d_shader_ver_ge(version, 5, 0)) +- { +- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); +- return; +- } +- instr.opcode = VKD3D_SM5_OP_GATHER4_PO; +- sm4_src_from_node(tpf, &instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL); +- } +- } +- +- sm4_src_from_deref(tpf, &instr.srcs[instr.src_count++], resource, instr.dsts[0].write_mask, &instr); +- +- src = &instr.srcs[instr.src_count++]; +- sm4_src_from_deref(tpf, src, sampler, VKD3DSP_WRITEMASK_ALL, &instr); +- src->reg.dimension = VSIR_DIMENSION_VEC4; +- src->swizzle = swizzle; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_ir_node *texel_offset = load->texel_offset.node; +- const struct hlsl_ir_node *sample_index = load->sample_index.node; +- const struct hlsl_ir_node *coords = load->coords.node; +- +- if (load->sampler.var && !load->sampler.var->is_uniform) +- { +- hlsl_fixme(tpf->ctx, &load->node.loc, "Sample using non-uniform sampler variable."); +- return; +- } +- +- if (!load->resource.var->is_uniform) +- { +- hlsl_fixme(tpf->ctx, &load->node.loc, "Load from non-uniform resource variable."); +- return; +- } +- +- switch (load->load_type) +- { +- case HLSL_RESOURCE_LOAD: +- write_sm4_ld(tpf, &load->node, &load->resource, +- coords, sample_index, texel_offset, load->sampling_dim); +- break; +- +- case HLSL_RESOURCE_SAMPLE: +- case HLSL_RESOURCE_SAMPLE_CMP: +- case HLSL_RESOURCE_SAMPLE_CMP_LZ: +- case HLSL_RESOURCE_SAMPLE_LOD: +- case HLSL_RESOURCE_SAMPLE_LOD_BIAS: +- case HLSL_RESOURCE_SAMPLE_GRAD: +- /* Combined sample expressions were lowered. */ +- VKD3D_ASSERT(load->sampler.var); +- write_sm4_sample(tpf, load); +- break; +- +- case HLSL_RESOURCE_GATHER_RED: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(X, X, X, X), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_GREEN: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_BLUE: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z), texel_offset); +- break; +- +- case HLSL_RESOURCE_GATHER_ALPHA: +- write_sm4_gather(tpf, &load->node, &load->resource, &load->sampler, coords, +- VKD3D_SHADER_SWIZZLE(W, W, W, W), texel_offset); +- break; +- +- case HLSL_RESOURCE_SAMPLE_INFO: +- write_sm4_sampleinfo(tpf, load); +- break; +- +- case HLSL_RESOURCE_RESINFO: +- write_sm4_resinfo(tpf, load); +- break; +- +- case HLSL_RESOURCE_SAMPLE_PROJ: +- vkd3d_unreachable(); +- } +-} +- +-static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) +-{ +- struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); +- struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; +- struct sm4_instruction instr; +- +- if (!store->resource.var->is_uniform) +- { +- hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); +- return; +- } +- +- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- { +- hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); +- return; +- } +- +- memset(&instr, 0, sizeof(instr)); +- +- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr); +- instr.dst_count = 1; +- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) +- { +- instr.opcode = VKD3D_SM5_OP_STORE_RAW; +- instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx); +- } +- else +- { +- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; +- } +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) +-{ +- const struct hlsl_ir_node *rhs = store->rhs.node; +- struct sm4_instruction instr; +- uint32_t writemask; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); +- instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) +-{ +- const struct hlsl_ir_node *selector = s->selector.node; +- struct hlsl_ir_switch_case *c; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SWITCH; +- +- sm4_src_from_node(tpf, &instr.srcs[0], selector, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +- +- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) +- { +- memset(&instr, 0, sizeof(instr)); +- if (c->is_default) +- { +- instr.opcode = VKD3D_SM4_OP_DEFAULT; +- } +- else +- { +- struct hlsl_constant_value value = { .u[0].u = c->value }; +- +- instr.opcode = VKD3D_SM4_OP_CASE; +- sm4_src_from_constant_value(&instr.srcs[0], &value, 1, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 1; +- } +- +- write_sm4_instruction(tpf, &instr); +- write_sm4_block(tpf, &c->body); +- } +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_ENDSWITCH; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) +-{ +- unsigned int hlsl_swizzle; +- struct sm4_instruction instr; +- uint32_t writemask; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); +- instr.dst_count = 1; +- +- sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); +- hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), +- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); +- instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- + static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { ++ struct sm4_instruction_modifier *modifier; + const struct vkd3d_sm4_opcode_info *info; + struct sm4_instruction instr = {0}; + unsigned int dst_count, src_count; +@@ -5655,6 +4723,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + for (unsigned int i = 0; i < ins->src_count; ++i) + instr.srcs[i] = ins->src[i]; + ++ if (ins->texel_offset.u || ins->texel_offset.v || ins->texel_offset.w) ++ { ++ VKD3D_ASSERT(instr.modifier_count < ARRAY_SIZE(instr.modifiers)); ++ modifier = &instr.modifiers[instr.modifier_count++]; ++ modifier->type = VKD3D_SM4_MODIFIER_AOFFIMMI; ++ modifier->u.aoffimmi.u = ins->texel_offset.u; ++ modifier->u.aoffimmi.v = ins->texel_offset.v; ++ modifier->u.aoffimmi.w = ins->texel_offset.w; ++ } ++ ++ if (info->is_conditional_op) ++ { ++ if (ins->flags == VKD3D_SHADER_CONDITIONAL_OP_NZ) ++ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; ++ } ++ + write_sm4_instruction(tpf, &instr); + } + +@@ -5704,6 +4788,11 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + + case VKD3DSIH_ADD: + case VKD3DSIH_AND: ++ case VKD3DSIH_BREAK: ++ case VKD3DSIH_CASE: ++ case VKD3DSIH_CONTINUE: ++ case VKD3DSIH_DEFAULT: ++ case VKD3DSIH_DISCARD: + case VKD3DSIH_DIV: + case VKD3DSIH_DP2: + case VKD3DSIH_DP3: +@@ -5714,6 +4803,10 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_DSY: + case VKD3DSIH_DSY_COARSE: + case VKD3DSIH_DSY_FINE: ++ case VKD3DSIH_ELSE: ++ case VKD3DSIH_ENDIF: ++ case VKD3DSIH_ENDLOOP: ++ case VKD3DSIH_ENDSWITCH: + case VKD3DSIH_EQO: + case VKD3DSIH_EXP: + case VKD3DSIH_F16TOF32: +@@ -5721,9 +4814,14 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_FRC: + case VKD3DSIH_FTOI: + case VKD3DSIH_FTOU: ++ case VKD3DSIH_GATHER4: ++ case VKD3DSIH_GATHER4_PO: + case VKD3DSIH_GEO: ++ case VKD3DSIH_HS_CONTROL_POINT_PHASE: ++ case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_IADD: + case VKD3DSIH_IEQ: ++ case VKD3DSIH_IF: + case VKD3DSIH_IGE: + case VKD3DSIH_ILT: + case VKD3DSIH_IMAD: +@@ -5735,7 +4833,12 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_ISHL: + case VKD3DSIH_ISHR: + case VKD3DSIH_ITOF: ++ case VKD3DSIH_LD: ++ case VKD3DSIH_LD2DMS: ++ case VKD3DSIH_LD_RAW: ++ case VKD3DSIH_LD_UAV_TYPED: + case VKD3DSIH_LOG: ++ case VKD3DSIH_LOOP: + case VKD3DSIH_LTO: + case VKD3DSIH_MAD: + case VKD3DSIH_MAX: +@@ -5747,14 +4850,25 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + case VKD3DSIH_NOT: + case VKD3DSIH_OR: + case VKD3DSIH_RCP: ++ case VKD3DSIH_RESINFO: ++ case VKD3DSIH_RET: + case VKD3DSIH_ROUND_NE: + case VKD3DSIH_ROUND_NI: + case VKD3DSIH_ROUND_PI: + case VKD3DSIH_ROUND_Z: + case VKD3DSIH_RSQ: ++ case VKD3DSIH_SAMPLE: ++ case VKD3DSIH_SAMPLE_B: ++ case VKD3DSIH_SAMPLE_C: ++ case VKD3DSIH_SAMPLE_C_LZ: ++ case VKD3DSIH_SAMPLE_GRAD: + case VKD3DSIH_SAMPLE_INFO: ++ case VKD3DSIH_SAMPLE_LOD: + case VKD3DSIH_SINCOS: + case VKD3DSIH_SQRT: ++ case VKD3DSIH_STORE_RAW: ++ case VKD3DSIH_STORE_UAV_TYPED: ++ case VKD3DSIH_SWITCH: + case VKD3DSIH_UDIV: + case VKD3DSIH_UGE: + case VKD3DSIH_ULT: +@@ -5772,90 +4886,15 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ + } + } + +-static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) ++static void tpf_write_program(struct tpf_compiler *tpf, const struct vsir_program *program) + { +- const struct hlsl_ir_node *instr; +- unsigned int vsir_instr_idx; +- +- LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +- { +- if (instr->data_type) +- { +- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) +- { +- hlsl_fixme(tpf->ctx, &instr->loc, "Class %#x should have been lowered or removed.", +- instr->data_type->class); +- break; +- } +- +- if (!instr->reg.allocated) +- { +- VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); +- continue; +- } +- } +- +- switch (instr->type) +- { +- case HLSL_IR_CALL: +- case HLSL_IR_CONSTANT: +- vkd3d_unreachable(); +- +- case HLSL_IR_IF: +- write_sm4_if(tpf, hlsl_ir_if(instr)); +- break; +- +- case HLSL_IR_JUMP: +- write_sm4_jump(tpf, hlsl_ir_jump(instr)); +- break; +- +- case HLSL_IR_LOAD: +- write_sm4_load(tpf, hlsl_ir_load(instr)); +- break; +- +- case HLSL_IR_RESOURCE_LOAD: +- write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); +- break; +- +- case HLSL_IR_RESOURCE_STORE: +- write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); +- break; +- +- case HLSL_IR_LOOP: +- write_sm4_loop(tpf, hlsl_ir_loop(instr)); +- break; +- +- case HLSL_IR_STORE: +- write_sm4_store(tpf, hlsl_ir_store(instr)); +- break; +- +- case HLSL_IR_SWITCH: +- write_sm4_switch(tpf, hlsl_ir_switch(instr)); +- break; +- +- case HLSL_IR_SWIZZLE: +- write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); +- break; +- +- case HLSL_IR_VSIR_INSTRUCTION_REF: +- vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; +- tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); +- } +- } +-} ++ unsigned int i; + +-static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) +-{ + if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) + tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); + +- write_sm4_block(tpf, &func->body); +- +- write_sm4_ret(tpf); ++ for (i = 0; i < program->instructions.count; ++i) ++ tpf_handle_instruction(tpf, &program->instructions.elements[i]); + } + + static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *entry_func) +@@ -5944,16 +4983,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec + write_sm4_dcl_textures(tpf, resource, true); + } + +- if (version->type == VKD3D_SHADER_TYPE_HULL) +- tpf_write_hs_control_point_phase(tpf); +- +- tpf_write_shader_function(tpf, entry_func); +- +- if (version->type == VKD3D_SHADER_TYPE_HULL) +- { +- tpf_write_hs_fork_phase(tpf); +- tpf_write_shader_function(tpf, ctx->patch_constant_func); +- } ++ tpf_write_program(tpf, tpf->program); + + set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t)); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index d751f2dc6bf..db61eec8f28 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -23,6 +23,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -805,6 +807,9 @@ struct vkd3d_shader_scan_context + + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + size_t combined_samplers_size; ++ ++ enum vkd3d_shader_tessellator_output_primitive output_primitive; ++ enum vkd3d_shader_tessellator_partitioning partitioning; + }; + + static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, +@@ -1262,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, + instruction->declaration.structured_resource.byte_stride, false, instruction->flags); + break; ++ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: ++ context->output_primitive = instruction->declaration.tessellator_output_primitive; ++ break; ++ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: ++ context->partitioning = instruction->declaration.tessellator_partitioning; ++ break; + case VKD3DSIH_IF: + case VKD3DSIH_IFC: + cf_info = vkd3d_shader_scan_push_cf_info(context); +@@ -1502,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) + { + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; ++ struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; + struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; + struct vkd3d_shader_scan_descriptor_info *descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; +@@ -1530,6 +1542,8 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + descriptor_info1 = &local_descriptor_info1; + } + ++ tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); ++ + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, + descriptor_info1, combined_sampler_info, message_context); + +@@ -1573,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + if (!ret && descriptor_info) + ret = convert_descriptor_info(descriptor_info, descriptor_info1); + ++ if (!ret && tessellation_info) ++ { ++ tessellation_info->output_primitive = context.output_primitive; ++ tessellation_info->partitioning = context.partitioning; ++ } ++ + if (ret < 0) + { + if (combined_sampler_info) +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index be7c0b73a22..ad04972b3fb 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1431,6 +1431,7 @@ struct vsir_program + uint8_t diffuse_written_mask; + enum vsir_control_flow_type cf_type; + enum vsir_normalisation_level normalisation_level; ++ enum vkd3d_tessellator_domain tess_domain; + + const char **block_names; + size_t block_name_count; +-- +2.45.2 + diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch new file mode 100644 index 00000000..bf778c6a --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-39cbef9e018ee760ffd175fdd6729e47052.patch @@ -0,0 +1,1105 @@ +From f5513fb3ce1827645b74a43d1486c60d4dcdfe9b Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 3 Dec 2024 09:14:28 +1100 +Subject: [PATCH] Updated vkd3d to 39cbef9e018ee760ffd175fdd6729e470529fb77. + +--- + libs/vkd3d/include/vkd3d_shader.h | 194 +++++++ + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 + + libs/vkd3d/libs/vkd3d-shader/ir.c | 498 +++++++++++++++++- + libs/vkd3d/libs/vkd3d-shader/msl.c | 34 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 99 ++-- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + + 6 files changed, 746 insertions(+), 82 deletions(-) + +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index cb561d7f079..af55d63a5c8 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -476,6 +476,109 @@ enum vkd3d_shader_binding_flag + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), + }; + ++/** ++ * The factor used to interpolate the fragment output colour with fog. ++ * ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for specification of the ++ * interpolation factor as defined here. ++ * ++ * The following variables may be used to determine the interpolation factor: ++ * ++ * c = The fog coordinate value output from the vertex shader. This is an ++ * inter-stage varying with the semantic name "FOG" and semantic index 0. ++ * It may be modified by VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE. ++ * E = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_END. ++ * k = The value of VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE. ++ * ++ * \since 1.15 ++ */ ++enum vkd3d_shader_fog_fragment_mode ++{ ++ /** ++ * No fog interpolation is applied; ++ * the output colour is passed through unmodified. ++ * Equivalently, the fog interpolation factor is 1. ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_NONE = 0x0, ++ /** ++ * The fog interpolation factor is 2^-(k * c). ++ * ++ * In order to implement traditional exponential fog, as present in ++ * Direct3D and OpenGL, i.e. ++ * ++ * e^-(density * c) ++ * ++ * set ++ * ++ * k = density * log₂(e) ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_EXP = 0x1, ++ /** ++ * The fog interpolation factor is 2^-((k * c)²). ++ * ++ * In order to implement traditional square-exponential fog, as present in ++ * Direct3D and OpenGL, i.e. ++ * ++ * e^-((density * c)²) ++ * ++ * set ++ * ++ * k = density * √log₂(e) ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_EXP2 = 0x2, ++ /** ++ * The fog interpolation factor is (E - c) * k. ++ * ++ * In order to implement traditional linear fog, as present in Direct3D and ++ * OpenGL, i.e. ++ * ++ * (end - c) / (end - start) ++ * ++ * set ++ * ++ * E = end ++ * k = 1 / (end - start) ++ */ ++ VKD3D_SHADER_FOG_FRAGMENT_LINEAR = 0x3, ++}; ++ ++/** ++ * The source of the fog varying output by a pre-rasterization shader. ++ * The fog varying is defined as the output varying with the semantic name "FOG" ++ * and semantic index 0. ++ * ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE for further documentation of this ++ * parameter. ++ * ++ * \since 1.15 ++ */ ++enum vkd3d_shader_fog_source ++{ ++ /** ++ * The source shader is not modified. That is, the fog varying in the target ++ * shader is the original fog varying if and only if present. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_FOG = 0x0, ++ /** ++ * If the source shader has a fog varying, it is not modified. ++ * Otherwise, if the source shader outputs a varying with semantic name ++ * "COLOR" and semantic index 1 whose index includes a W component, ++ * said W component is output as fog varying. ++ * Otherwise, no fog varying is output. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W = 0x1, ++ /** ++ * The fog source is the Z component of the position output by the vertex ++ * shader. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_Z = 0x2, ++ /** ++ * The fog source is the W component of the position output by the vertex ++ * shader. ++ */ ++ VKD3D_SHADER_FOG_SOURCE_W = 0x3, ++}; ++ + /** + * The manner in which a parameter value is provided to the shader, used in + * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. +@@ -739,6 +842,97 @@ enum vkd3d_shader_parameter_name + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, ++ /** ++ * Fog mode used in fragment shaders. ++ * ++ * The value specified by this parameter must be a member of ++ * enum vkd3d_shader_fog_fragment_mode. ++ * ++ * If not VKD3D_SHADER_FOG_FRAGMENT_NONE, the pixel shader colour output at ++ * location 0 is linearly interpolated with the fog colour defined by ++ * VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR. The interpolation factor is ++ * defined according to the enumerant selected by this parameter. ++ * The interpolated value is then outputted instead of the original value at ++ * location 0. ++ * ++ * An interpolation factor of 0 specifies to use the fog colour; a factor of ++ * 1 specifies to use the original colour output. The interpolation factor ++ * is clamped to the [0, 1] range before interpolating. ++ * ++ * The default value is VKD3D_SHADER_FOG_FRAGMENT_NONE. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE, ++ /** ++ * Fog colour. ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of ++ * fog. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32_VEC4. ++ * ++ * The default value is transparent black, i.e. the vector {0, 0, 0, 0}. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, ++ /** ++ * End coordinate for linear fog. ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of ++ * fog. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * The default value is 1.0. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_END, ++ /** ++ * Scale value for fog. ++ * See VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE for documentation of ++ * fog. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * The default value is 1.0. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, ++ /** ++ * Fog source. The value specified by this parameter must be a member of ++ * enum vkd3d_shader_fog_source. ++ * ++ * This parameter replaces or suppletes the fog varying output by a ++ * pre-rasterization shader. The fog varying is defined as the output ++ * varying with the semantic name "FOG" and semantic index 0. ++ * ++ * Together with other fog parameters, this parameter can be used to ++ * implement fixed function fog, as present in Direct3D versions up to 9, ++ * if the target environment does not support fog as part of its own ++ * fixed-function API (as Vulkan and core OpenGL). ++ * ++ * The default value is VKD3D_SHADER_FOG_SOURCE_FOG. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), + }; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index bda9bc72f56..7db658fb541 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -968,6 +968,8 @@ static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const + + if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_POINT_SIZE) + sm1->p.program->has_point_size = true; ++ if (dst_param->reg.type == VKD3DSPR_RASTOUT && dst_param->reg.idx[0].offset == VSIR_RASTOUT_FOG) ++ sm1->p.program->has_fog = true; + } + + static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 0c06db9ff15..53b26dac76e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -1,5 +1,6 @@ + /* + * Copyright 2023 Conor McCarthy for CodeWeavers ++ * Copyright 2023-2024 Elizabeth Figura for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public +@@ -222,6 +223,14 @@ static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_ + src->reg.idx[0].offset = idx; + } + ++static void src_param_init_parameter_vec4(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) ++{ ++ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); ++ src->reg.idx[0].offset = idx; ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; ++} ++ + static void vsir_src_param_init_resource(struct vkd3d_shader_src_param *src, unsigned int id, unsigned int idx) + { + vsir_src_param_init(src, VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 2); +@@ -251,6 +260,14 @@ static void src_param_init_ssa_float(struct vkd3d_shader_src_param *src, unsigne + src->reg.idx[0].offset = idx; + } + ++static void src_param_init_ssa_float4(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ src->reg.idx[0].offset = idx; ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = VKD3D_SHADER_NO_SWIZZLE; ++} ++ + static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) + { + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); +@@ -306,6 +323,14 @@ static void dst_param_init_ssa_float(struct vkd3d_shader_dst_param *dst, unsigne + dst->reg.idx[0].offset = idx; + } + ++static void dst_param_init_ssa_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_FLOAT, 1); ++ dst->reg.idx[0].offset = idx; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = VKD3DSP_WRITEMASK_ALL; ++} ++ + static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) + { + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); +@@ -864,11 +889,36 @@ static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, + return VKD3D_OK; + } + ++static bool add_signature_element(struct shader_signature *signature, const char *semantic_name, ++ uint32_t semantic_index, uint32_t mask, uint32_t register_index, ++ enum vkd3d_shader_interpolation_mode interpolation_mode) ++{ ++ struct signature_element *new_elements, *e; ++ ++ if (!(new_elements = vkd3d_realloc(signature->elements, ++ (signature->element_count + 1) * sizeof(*signature->elements)))) ++ return false; ++ signature->elements = new_elements; ++ e = &signature->elements[signature->element_count++]; ++ memset(e, 0, sizeof(*e)); ++ e->semantic_name = vkd3d_strdup(semantic_name); ++ e->semantic_index = semantic_index; ++ e->sysval_semantic = VKD3D_SHADER_SV_NONE; ++ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ e->register_count = 1; ++ e->mask = mask; ++ e->used_mask = mask; ++ e->register_index = register_index; ++ e->target_location = register_index; ++ e->interpolation_mode = interpolation_mode; ++ return true; ++} ++ + static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) + { + struct shader_signature *signature = &program->output_signature; +- struct signature_element *new_elements, *e; ++ struct signature_element *e; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) + return VKD3D_OK; +@@ -881,22 +931,8 @@ static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *pr + return VKD3D_OK; + } + +- if (!(new_elements = vkd3d_realloc(signature->elements, +- (signature->element_count + 1) * sizeof(*signature->elements)))) ++ if (!add_signature_element(signature, "COLOR", 0, VKD3DSP_WRITEMASK_ALL, SM1_COLOR_REGISTER_OFFSET, VKD3DSIM_NONE)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- signature->elements = new_elements; +- e = &signature->elements[signature->element_count++]; +- memset(e, 0, sizeof(*e)); +- e->semantic_name = vkd3d_strdup("COLOR"); +- e->sysval_semantic = VKD3D_SHADER_SV_NONE; +- e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; +- e->register_count = 1; +- e->mask = VKD3DSP_WRITEMASK_ALL; +- e->used_mask = VKD3DSP_WRITEMASK_ALL; +- e->register_index = SM1_COLOR_REGISTER_OFFSET; +- e->target_location = SM1_COLOR_REGISTER_OFFSET; +- e->interpolation_mode = VKD3DSIM_NONE; +- + return VKD3D_OK; + } + +@@ -1051,6 +1087,9 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + + e->target_location = map->input_register_index; + ++ TRACE("Mapping signature index %u (mask %#x) to target location %u (mask %#x).\n", ++ i, e->mask, map->input_register_index, map->input_mask); ++ + if ((input_mask & e->mask) == input_mask) + { + ++subset_varying_count; +@@ -1071,6 +1110,8 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + } + else + { ++ TRACE("Marking signature index %u (mask %#x) as unused.\n", i, e->mask); ++ + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; + } + +@@ -6685,6 +6726,423 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr + return VKD3D_OK; + } + ++static enum vkd3d_result vsir_program_add_fog_input(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct shader_signature *signature = &program->input_signature; ++ uint32_t register_idx = 0; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; ++ ++ if (!vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE)) ++ return VKD3D_OK; ++ ++ /* We could check the value and skip this if NONE, but chances are if a ++ * user specifies the fog fragment mode as a parameter, they'll want to ++ * enable it dynamically. Always specifying it (and hence always outputting ++ * it from the VS) avoids an extra VS variant. */ ++ ++ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) ++ return VKD3D_OK; ++ ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ register_idx = max(register_idx, signature->elements[i].register_index + 1); ++ ++ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *program, ++ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_fragment_mode mode, ++ uint32_t fog_signature_idx, uint32_t colour_signature_idx, uint32_t colour_temp, ++ size_t *ret_pos, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ struct vkd3d_shader_location loc = ret->location; ++ uint32_t ssa_factor = program->ssa_count++; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ uint32_t ssa_temp, ssa_temp2; ++ ++ switch (mode) ++ { ++ case VKD3D_SHADER_FOG_FRAGMENT_LINEAR: ++ /* We generate the following code: ++ * ++ * add sr0, FOG_END, -vFOG.x ++ * mul_sat srFACTOR, sr0, FOG_SCALE ++ */ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ *ret_pos = pos + 4; ++ ++ ssa_temp = program->ssa_count++; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_ADD, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_END, VKD3D_DATA_FLOAT); ++ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); ++ ins->src[1].reg.idx[0].offset = fog_signature_idx; ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ ins->src[1].modifiers = VKD3DSPSM_NEG; ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); ++ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; ++ src_param_init_ssa_float(&ins->src[0], ssa_temp); ++ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); ++ break; ++ ++ case VKD3D_SHADER_FOG_FRAGMENT_EXP: ++ /* We generate the following code: ++ * ++ * mul sr0, FOG_SCALE, vFOG.x ++ * exp_sat srFACTOR, -sr0 ++ */ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ *ret_pos = pos + 4; ++ ++ ssa_temp = program->ssa_count++; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); ++ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); ++ ins->src[1].reg.idx[0].offset = fog_signature_idx; ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); ++ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; ++ src_param_init_ssa_float(&ins->src[0], ssa_temp); ++ ins->src[0].modifiers = VKD3DSPSM_NEG; ++ break; ++ ++ case VKD3D_SHADER_FOG_FRAGMENT_EXP2: ++ /* We generate the following code: ++ * ++ * mul sr0, FOG_SCALE, vFOG.x ++ * mul sr1, sr0, sr0 ++ * exp_sat srFACTOR, -sr1 ++ */ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ *ret_pos = pos + 5; ++ ++ ssa_temp = program->ssa_count++; ++ ssa_temp2 = program->ssa_count++; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp); ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, VKD3D_DATA_FLOAT); ++ vsir_src_param_init(&ins->src[1], VKD3DSPR_INPUT, VKD3D_DATA_FLOAT, 1); ++ ins->src[1].reg.idx[0].offset = fog_signature_idx; ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MUL, 1, 2); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_temp2); ++ src_param_init_ssa_float(&ins->src[0], ssa_temp); ++ src_param_init_ssa_float(&ins->src[1], ssa_temp); ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_EXP, 1, 1); ++ dst_param_init_ssa_float(&ins->dst[0], ssa_factor); ++ ins->dst[0].modifiers = VKD3DSPDM_SATURATE; ++ src_param_init_ssa_float(&ins->src[0], ssa_temp2); ++ ins->src[0].modifiers = VKD3DSPSM_NEG; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ /* We generate the following code: ++ * ++ * add sr0, FRAG_COLOUR, -FOG_COLOUR ++ * mad oC0, sr0, srFACTOR, FOG_COLOUR ++ */ ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_ADD, 1, 2); ++ dst_param_init_ssa_float4(&ins->dst[0], program->ssa_count++); ++ src_param_init_temp_float4(&ins->src[0], colour_temp); ++ src_param_init_parameter_vec4(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); ++ ins->src[1].modifiers = VKD3DSPSM_NEG; ++ ++ vsir_instruction_init_with_params(program, ++ins, &loc, VKD3DSIH_MAD, 1, 3); ++ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, colour_signature_idx, ++ program->output_signature.elements[colour_signature_idx].mask); ++ src_param_init_ssa_float4(&ins->src[0], program->ssa_count - 1); ++ src_param_init_ssa_float(&ins->src[1], ssa_factor); ++ src_param_init_parameter_vec4(&ins->src[2], VKD3D_SHADER_PARAMETER_NAME_FOG_COLOUR, VKD3D_DATA_FLOAT); ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_insert_fragment_fog(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct vkd3d_shader_message_context *message_context = ctx->message_context; ++ uint32_t colour_signature_idx, fog_signature_idx, colour_temp; ++ const struct vkd3d_shader_parameter1 *mode_parameter = NULL; ++ static const struct vkd3d_shader_location no_loc; ++ const struct signature_element *fog_element; ++ enum vkd3d_shader_fog_fragment_mode mode; ++ struct vkd3d_shader_instruction *ins; ++ size_t new_pos; ++ int ret; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; ++ ++ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx)) ++ return VKD3D_OK; ++ ++ if (!(mode_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_FRAGMENT_MODE))) ++ return VKD3D_OK; ++ ++ if (mode_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported fog fragment mode parameter type %#x.", mode_parameter->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (mode_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid fog fragment mode parameter data type %#x.", mode_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ mode = mode_parameter->u.immediate_constant.u.u32; ++ ++ if (mode == VKD3D_SHADER_FOG_FRAGMENT_NONE) ++ return VKD3D_OK; ++ ++ /* Should have been added by vsir_program_add_fog_input(). */ ++ if (!(fog_element = vsir_signature_find_element_by_name(&program->input_signature, "FOG", 0))) ++ { ++ ERR("Fog input not found.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ fog_signature_idx = fog_element - program->input_signature.elements; ++ ++ /* We're going to be reading from the output, so we need to go ++ * through the whole shader and convert it to a temp. */ ++ colour_temp = program->temp_count++; ++ ++ for (size_t i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ if (ins->opcode == VKD3DSIH_RET) ++ { ++ if ((ret = insert_fragment_fog_before_ret(program, ins, mode, fog_signature_idx, ++ colour_signature_idx, colour_temp, &new_pos, message_context)) < 0) ++ return ret; ++ i = new_pos; ++ continue; ++ } ++ ++ for (size_t j = 0; j < ins->dst_count; ++j) ++ { ++ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; ++ ++ /* Note we run after I/O normalization. */ ++ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) ++ { ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = colour_temp; ++ } ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_add_fog_output(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct shader_signature *signature = &program->output_signature; ++ const struct vkd3d_shader_parameter1 *source_parameter; ++ uint32_t register_idx = 0; ++ ++ if (!is_pre_rasterization_shader(program->shader_version.type)) ++ return VKD3D_OK; ++ ++ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) ++ return VKD3D_OK; ++ ++ if (source_parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ enum vkd3d_shader_fog_source source = source_parameter->u.immediate_constant.u.u32; ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG) ++ return VKD3D_OK; ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W ++ && !vsir_signature_find_element_by_name(signature, "COLOR", 1)) ++ return VKD3D_OK; ++ } ++ ++ if (vsir_signature_find_element_by_name(signature, "FOG", 0)) ++ return VKD3D_OK; ++ ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ register_idx = max(register_idx, signature->elements[i].register_index + 1); ++ ++ if (!add_signature_element(signature, "FOG", 0, VKD3DSP_WRITEMASK_0, register_idx, VKD3DSIM_LINEAR)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *program, ++ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_fog_source source, uint32_t temp, ++ uint32_t fog_signature_idx, uint32_t source_signature_idx, size_t *ret_pos) ++{ ++ const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ /* Write the fog output. */ ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); ++ src_param_init_temp_float4(&ins->src[0], temp); ++ if (source == VKD3D_SHADER_FOG_SOURCE_Z) ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z); ++ else /* Position or specular W. */ ++ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ ++ins; ++ ++ /* Write the position or specular output. */ ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), ++ source_signature_idx, e->mask); ++ src_param_init_temp_float4(&ins->src[0], temp); ++ ++ins; ++ ++ *ret_pos = pos + 2; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_insert_vertex_fog(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct vkd3d_shader_message_context *message_context = ctx->message_context; ++ const struct vkd3d_shader_parameter1 *source_parameter = NULL; ++ uint32_t fog_signature_idx, source_signature_idx, temp; ++ static const struct vkd3d_shader_location no_loc; ++ enum vkd3d_shader_fog_source source; ++ const struct signature_element *e; ++ ++ if (!is_pre_rasterization_shader(program->shader_version.type)) ++ return VKD3D_OK; ++ ++ if (!(source_parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FOG_SOURCE))) ++ return VKD3D_OK; ++ ++ if (source_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported fog source parameter type %#x.", source_parameter->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (source_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid fog source parameter data type %#x.", source_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ source = source_parameter->u.immediate_constant.u.u32; ++ ++ TRACE("Fog source %#x.\n", source); ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG) ++ return VKD3D_OK; ++ ++ if (source == VKD3D_SHADER_FOG_SOURCE_FOG_OR_SPECULAR_W) ++ { ++ if (program->has_fog || !(e = vsir_signature_find_element_by_name(&program->output_signature, "COLOR", 1))) ++ return VKD3D_OK; ++ source_signature_idx = e - program->output_signature.elements; ++ } ++ else ++ { ++ if (!vsir_signature_find_sysval(&program->output_signature, ++ VKD3D_SHADER_SV_POSITION, 0, &source_signature_idx)) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, ++ VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ } ++ ++ if (!(e = vsir_signature_find_element_by_name(&program->output_signature, "FOG", 0))) ++ { ++ ERR("Fog output not found.\n"); ++ return VKD3D_ERROR_INVALID_SHADER; ++ } ++ fog_signature_idx = e - program->output_signature.elements; ++ ++ temp = program->temp_count++; ++ ++ /* Insert a fog write before each ret, and convert either specular or ++ * position output to a temp. */ ++ for (size_t i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ if (ins->opcode == VKD3DSIH_RET) ++ { ++ size_t new_pos; ++ int ret; ++ ++ if ((ret = insert_vertex_fog_before_ret(program, ins, source, temp, ++ fog_signature_idx, source_signature_idx, &new_pos)) < 0) ++ return ret; ++ i = new_pos; ++ continue; ++ } ++ ++ for (size_t j = 0; j < ins->dst_count; ++j) ++ { ++ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; ++ ++ /* Note we run after I/O normalization. */ ++ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == source_signature_idx) ++ { ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = temp; ++ } ++ } ++ } ++ ++ program->has_fog = true; ++ ++ return VKD3D_OK; ++} ++ + struct validation_context + { + struct vkd3d_shader_message_context *message_context; +@@ -8769,6 +9227,12 @@ enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uin + if (program->shader_version.major <= 2) + vsir_transform(&ctx, vsir_program_add_diffuse_output); + ++ /* For vsir_program_insert_fragment_fog(). */ ++ vsir_transform(&ctx, vsir_program_add_fog_input); ++ ++ /* For vsir_program_insert_vertex_fog(). */ ++ vsir_transform(&ctx, vsir_program_add_fog_output); ++ + return ctx.result; + } + +@@ -8823,6 +9287,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t + vsir_transform(&ctx, vsir_program_insert_point_size); + vsir_transform(&ctx, vsir_program_insert_point_size_clamp); + vsir_transform(&ctx, vsir_program_insert_point_coord); ++ vsir_transform(&ctx, vsir_program_insert_fragment_fog); ++ vsir_transform(&ctx, vsir_program_insert_vertex_fog); + + if (TRACE_ON()) + vsir_program_trace(program); +diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c +index 29f51088728..9a3c3ed885e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/msl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/msl.c +@@ -41,6 +41,8 @@ struct msl_generator + const char *prefix; + bool failed; + ++ bool write_depth; ++ + const struct vkd3d_shader_interface_info *interface_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; + }; +@@ -153,6 +155,14 @@ static void msl_print_register_name(struct vkd3d_string_buffer *buffer, + msl_print_register_datatype(buffer, gen, reg->data_type); + break; + ++ case VKD3DSPR_DEPTHOUT: ++ if (gen->program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled depth output in shader type #%x.", ++ gen->program->shader_version.type); ++ vkd3d_string_buffer_printf(buffer, "o_depth"); ++ break; ++ + case VKD3DSPR_IMMCONST: + switch (reg->dimension) + { +@@ -335,7 +345,8 @@ static uint32_t msl_dst_init(struct msl_dst *msl_dst, struct msl_generator *gen, + msl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); + + msl_print_register_name(msl_dst->register_name, gen, &vsir_dst->reg); +- msl_print_write_mask(msl_dst->mask, write_mask); ++ if (vsir_dst->reg.dimension == VSIR_DIMENSION_VEC4) ++ msl_print_write_mask(msl_dst->mask, write_mask); + + return write_mask; + } +@@ -827,6 +838,14 @@ static void msl_generate_output_struct_declarations(struct msl_generator *gen) + { + e = &signature->elements[i]; + ++ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) ++ { ++ gen->write_depth = true; ++ msl_print_indent(gen->buffer, 1); ++ vkd3d_string_buffer_printf(buffer, "float shader_out_depth [[depth(any)]];\n"); ++ continue; ++ } ++ + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + +@@ -936,6 +955,12 @@ static void msl_generate_entrypoint_epilogue(struct msl_generator *gen) + { + e = &signature->elements[i]; + ++ if (e->sysval_semantic == VKD3D_SHADER_SV_DEPTH) ++ { ++ vkd3d_string_buffer_printf(buffer, " output.shader_out_depth = shader_out_depth;\n"); ++ continue; ++ } ++ + if (e->target_location == SIGNATURE_TARGET_LOCATION_UNUSED) + continue; + +@@ -995,9 +1020,14 @@ static void msl_generate_entrypoint(struct msl_generator *gen) + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_vec4 %s_out[%u];\n", gen->prefix, 32); + vkd3d_string_buffer_printf(gen->buffer, " vkd3d_%s_out output;\n", gen->prefix); + ++ if (gen->write_depth) ++ vkd3d_string_buffer_printf(gen->buffer, " float shader_out_depth;\n"); ++ + msl_generate_entrypoint_prologue(gen); + + vkd3d_string_buffer_printf(gen->buffer, " %s_main(%s_in, %s_out", gen->prefix, gen->prefix, gen->prefix); ++ if (gen->write_depth) ++ vkd3d_string_buffer_printf(gen->buffer, ", shader_out_depth"); + if (gen->descriptor_info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", descriptors"); + vkd3d_string_buffer_printf(gen->buffer, ");\n"); +@@ -1035,6 +1065,8 @@ static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader + "void %s_main(thread vkd3d_vec4 *v, " + "thread vkd3d_vec4 *o", + gen->prefix); ++ if (gen->write_depth) ++ vkd3d_string_buffer_printf(gen->buffer, ", thread float& o_depth"); + if (gen->descriptor_info->descriptor_count) + vkd3d_string_buffer_printf(gen->buffer, ", constant vkd3d_%s_descriptors& descriptors", gen->prefix); + vkd3d_string_buffer_printf(gen->buffer, ")\n{\n"); +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 005b40a9d1f..649f92a57f3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -2406,6 +2406,7 @@ struct vkd3d_hull_shader_variables + struct ssa_register_info + { + enum vkd3d_data_type data_type; ++ uint8_t write_mask; + uint32_t id; + }; + +@@ -3315,13 +3316,19 @@ static uint32_t spirv_compiler_emit_variable(struct spirv_compiler *compiler, + static const struct vkd3d_spec_constant_info + { + enum vkd3d_shader_parameter_name name; +- uint32_t default_value; ++ union ++ { ++ uint32_t u; ++ float f; ++ } default_value; + const char *debug_name; + } + vkd3d_shader_parameters[] = + { +- {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, +- {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, ++ {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, {.u = 1}, "sample_count"}, ++ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, {.f = 0.0f}, "alpha_test_ref"}, ++ {VKD3D_SHADER_PARAMETER_NAME_FOG_END, {.f = 1.0f}, "fog_end"}, ++ {VKD3D_SHADER_PARAMETER_NAME_FOG_SCALE, {.f = 1.0f}, "fog_scale"}, + }; + + static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) +@@ -3382,7 +3389,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile + const struct vkd3d_spec_constant_info *info; + + info = get_spec_constant_info(name); +- default_value = info ? info->default_value : 0; ++ default_value = info ? info->default_value.u : 0; + + scalar_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); + vector_type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), component_count); +@@ -3573,6 +3580,24 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, + register_info->is_aggregate = false; + return true; + } ++ else if (reg->type == VKD3DSPR_SSA) ++ { ++ const struct ssa_register_info *ssa = &compiler->ssa_register_info[reg->idx[0].offset]; ++ ++ if (!ssa->id) ++ { ++ /* Should only be from a missing instruction implementation. */ ++ VKD3D_ASSERT(compiler->failed); ++ return 0; ++ } ++ ++ memset(register_info, 0, sizeof(*register_info)); ++ register_info->id = ssa->id; ++ register_info->storage_class = SpvStorageClassMax; ++ register_info->component_type = vkd3d_component_type_from_data_type(ssa->data_type); ++ register_info->write_mask = ssa->write_mask; ++ return true; ++ } + + vkd3d_symbol_make_register(®_symbol, reg); + if (!(entry = rb_get(&compiler->symbol_table, ®_symbol))) +@@ -4180,67 +4205,14 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil + return const_id; + } + +-static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg) +-{ +- VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); +- VKD3D_ASSERT(reg->idx_count == 1); +- return &compiler->ssa_register_info[reg->idx[0].offset]; +-} +- + static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg, uint32_t val_id) ++ const struct vkd3d_shader_register *reg, uint32_t write_mask, uint32_t val_id) + { + unsigned int i = reg->idx[0].offset; + VKD3D_ASSERT(i < compiler->ssa_register_count); + compiler->ssa_register_info[i].data_type = reg->data_type; + compiler->ssa_register_info[i].id = val_id; +-} +- +-static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler, +- const struct vkd3d_shader_register *reg, enum vkd3d_shader_component_type component_type, +- uint32_t swizzle) +-{ +- struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +- enum vkd3d_shader_component_type reg_component_type; +- const struct ssa_register_info *ssa; +- unsigned int component_idx; +- uint32_t type_id, val_id; +- +- ssa = spirv_compiler_get_ssa_register_info(compiler, reg); +- val_id = ssa->id; +- if (!val_id) +- { +- /* Should only be from a missing instruction implementation. */ +- VKD3D_ASSERT(compiler->failed); +- return 0; +- } +- VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); +- +- reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); +- +- if (reg->dimension == VSIR_DIMENSION_SCALAR) +- { +- if (component_type != reg_component_type) +- { +- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); +- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); +- } +- +- return val_id; +- } +- +- if (component_type != reg_component_type) +- { +- /* Required for resource loads with sampled type int, because DXIL has no signedness. +- * Only 128-bit vector sizes are used. */ +- type_id = vkd3d_spirv_get_type_id(builder, component_type, VKD3D_VEC4_SIZE); +- val_id = vkd3d_spirv_build_op_bitcast(builder, type_id, val_id); +- } +- +- type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); +- component_idx = vsir_swizzle_get_component(swizzle, 0); +- return vkd3d_spirv_build_op_composite_extract1(builder, type_id, val_id, component_idx); ++ compiler->ssa_register_info[i].write_mask = write_mask; + } + + static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, +@@ -4266,9 +4238,6 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + component_count = vsir_write_mask_component_count(write_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); + +- if (reg->type == VKD3DSPR_SSA) +- return spirv_compiler_emit_load_ssa_reg(compiler, reg, component_type, swizzle); +- + if (!spirv_compiler_get_register_info(compiler, reg, ®_info)) + { + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); +@@ -4293,9 +4262,9 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + type_id = vkd3d_spirv_get_type_id(builder, + reg_info.component_type, vsir_write_mask_component_count(reg_info.write_mask)); + val_id = vkd3d_spirv_build_op_load(builder, type_id, reg_info.id, SpvMemoryAccessMaskNone); ++ swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; + } + +- swizzle = data_type_is_64_bit(reg->data_type) ? vsir_swizzle_32_from_64(swizzle) : swizzle; + val_id = spirv_compiler_emit_swizzle(compiler, + val_id, reg_info.write_mask, reg_info.component_type, swizzle, val_write_mask); + +@@ -4496,7 +4465,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, + + if (reg->type == VKD3DSPR_SSA) + { +- spirv_compiler_set_ssa_register_info(compiler, reg, val_id); ++ spirv_compiler_set_ssa_register_info(compiler, reg, write_mask, val_id); + return; + } + +@@ -7431,7 +7400,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, + + general_implementation: + write_mask = dst->write_mask; +- if (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) ++ if (data_type_is_64_bit(src->reg.data_type) && !data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_64_from_32(write_mask); + else if (!data_type_is_64_bit(src->reg.data_type) && data_type_is_64_bit(dst->reg.data_type)) + write_mask = vsir_write_mask_32_from_64(write_mask); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index ad04972b3fb..55b28cdd875 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1428,6 +1428,7 @@ struct vsir_program + bool use_vocp; + bool has_point_size; + bool has_point_coord; ++ bool has_fog; + uint8_t diffuse_written_mask; + enum vsir_control_flow_type cf_type; + enum vsir_normalisation_level normalisation_level; +-- +2.45.2 +