diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch similarity index 68% rename from patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch rename to patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch index 8df96103..51e6c899 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-e383834049825dde8feb0a230c39d499e58.patch @@ -1,42 +1,44 @@ -From aea35abd0efd5cca9e6af5d894539fcb6de6784e Mon Sep 17 00:00:00 2001 +From abcbb54af650bd1699f695bdbbffcbffe6ef84fe Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Sep 2024 07:18:49 +1000 -Subject: [PATCH] Updated vkd3d to 03ad04c89004c7f800c5b1a0ea7ba28622916328. +Subject: [PATCH] Updated vkd3d to e383834049825dde8feb0a230c39d499e580cdf1. --- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 4 +- - libs/vkd3d/include/vkd3d_shader.h | 159 +- + libs/vkd3d/include/vkd3d.h | 1 + + libs/vkd3d/include/vkd3d_shader.h | 219 +- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/checksum.c | 49 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 94 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1270 ++----- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 107 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 1262 ++--- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 21 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 116 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 1001 ++++- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 2107 ++++++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 349 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 134 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 3 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1046 ++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1789 ++++++++- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 167 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 2016 ++++++-- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 2369 +++++++++- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 430 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 181 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 10 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1348 ++++-- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 3775 +++++++++++++-- .../libs/vkd3d-shader/hlsl_constant_ops.c | 20 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 3347 +++++++++++------ - libs/vkd3d/libs/vkd3d-shader/msl.c | 881 +++++ + libs/vkd3d/libs/vkd3d-shader/ir.c | 4159 ++++++++++++----- + libs/vkd3d/libs/vkd3d-shader/msl.c | 898 ++++ libs/vkd3d/libs/vkd3d-shader/preproc.h | 3 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 56 +- libs/vkd3d/libs/vkd3d-shader/preproc.y | 13 - - libs/vkd3d/libs/vkd3d-shader/spirv.c | 433 ++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 1360 +++++-- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 145 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 105 +- - libs/vkd3d/libs/vkd3d/command.c | 91 +- - libs/vkd3d/libs/vkd3d/device.c | 1 + - libs/vkd3d/libs/vkd3d/state.c | 383 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 530 ++- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 2647 +++++------ + .../libs/vkd3d-shader/vkd3d_shader_main.c | 269 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 134 +- + libs/vkd3d/libs/vkd3d/command.c | 123 +- + libs/vkd3d/libs/vkd3d/device.c | 175 +- + libs/vkd3d/libs/vkd3d/resource.c | 14 +- + libs/vkd3d/libs/vkd3d/state.c | 410 +- libs/vkd3d/libs/vkd3d/utils.c | 3 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 1 + - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 4 +- - 32 files changed, 11274 insertions(+), 3716 deletions(-) + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 27 +- + 34 files changed, 15965 insertions(+), 5478 deletions(-) create mode 100644 libs/vkd3d/libs/vkd3d-shader/msl.c diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in @@ -73,11 +75,43 @@ index 39145a97df1..fd62730f948 100644 return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h +index 398ae2442d6..b18fd14f4c3 100644 +--- a/libs/vkd3d/include/vkd3d.h ++++ b/libs/vkd3d/include/vkd3d.h +@@ -98,6 +98,7 @@ enum vkd3d_api_version + VKD3D_API_VERSION_1_11, + VKD3D_API_VERSION_1_12, + VKD3D_API_VERSION_1_13, ++ VKD3D_API_VERSION_1_14, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_API_VERSION), + }; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d9a355d3bc9..5c0d13ea9e2 100644 +index d9a355d3bc9..cb561d7f079 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -190,6 +190,17 @@ enum vkd3d_shader_compile_option_backward_compatibility +@@ -56,6 +56,7 @@ enum vkd3d_shader_api_version + VKD3D_SHADER_API_VERSION_1_11, + VKD3D_SHADER_API_VERSION_1_12, + VKD3D_SHADER_API_VERSION_1_13, ++ VKD3D_SHADER_API_VERSION_1_14, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_API_VERSION), + }; +@@ -111,6 +112,11 @@ enum vkd3d_shader_structure_type + * \since 1.13 + */ + VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, ++ /** ++ * The structure is a vkd3d_shader_scan_hull_shader_tessellation_info structure. ++ * \since 1.15 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -190,6 +196,17 @@ enum vkd3d_shader_compile_option_backward_compatibility * - DEPTH to SV_Depth for pixel shader outputs. */ VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES = 0x00000001, @@ -95,7 +129,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY), }; -@@ -469,8 +480,8 @@ enum vkd3d_shader_parameter_type +@@ -469,8 +486,8 @@ enum vkd3d_shader_parameter_type /** The parameter value is embedded directly in the shader. */ VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, /** @@ -106,7 +140,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 */ VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, /** -@@ -495,6 +506,13 @@ enum vkd3d_shader_parameter_data_type +@@ -495,6 +512,13 @@ enum vkd3d_shader_parameter_data_type VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, /** The parameter is provided as a 32-bit float. \since 1.13 */ VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, @@ -120,7 +154,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), }; -@@ -578,6 +596,110 @@ enum vkd3d_shader_parameter_name +@@ -578,6 +602,143 @@ enum vkd3d_shader_parameter_name * \since 1.13 */ VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, @@ -228,10 +262,43 @@ index d9a355d3bc9..5c0d13ea9e2 100644 + * \since 1.14 + */ + VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, ++ /** ++ * Whether texture coordinate inputs should take their values from the ++ * point coordinate. ++ * ++ * When this parameter is provided to a pixel shader, and the value is ++ * nonzero, any fragment shader input with the semantic name "TEXCOORD" ++ * takes its value from the point coordinates instead of from the previous ++ * shader. The point coordinates here are defined as a four-component vector ++ * whose X and Y components are the X and Y coordinates of the fragment ++ * within a point being rasterized, and whose Z and W components are zero. ++ * ++ * In GLSL, the X and Y components are drawn from gl_PointCoord; in SPIR-V, ++ * they are drawn from a variable with the BuiltinPointCoord decoration. ++ * ++ * This includes t# fragment shader inputs in shader model 2 shaders, ++ * as well as texture sampling in shader model 1 shaders. ++ * ++ * This parameter can be used to implement fixed function point sprite, as ++ * present in Direct3D versions 8 and 9, if the target environment does not ++ * support point sprite as part of its own fixed-function API (as Vulkan and ++ * core OpenGL). ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ * ++ * The default value is zero, i.e. use the original varyings. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.14 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), }; -@@ -625,6 +747,13 @@ struct vkd3d_shader_parameter_immediate_constant1 +@@ -625,6 +786,13 @@ struct vkd3d_shader_parameter_immediate_constant1 * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. */ float f32; @@ -245,7 +312,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 void *_pointer_pad; uint32_t _pad[4]; } u; -@@ -636,7 +765,13 @@ struct vkd3d_shader_parameter_immediate_constant1 +@@ -636,7 +804,13 @@ struct vkd3d_shader_parameter_immediate_constant1 */ struct vkd3d_shader_parameter_specialization_constant { @@ -260,7 +327,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 uint32_t id; }; -@@ -1046,6 +1181,11 @@ enum vkd3d_shader_source_type +@@ -1046,6 +1220,11 @@ enum vkd3d_shader_source_type * the format used for Direct3D shader model 6 shaders. \since 1.9 */ VKD3D_SHADER_SOURCE_DXBC_DXIL, @@ -272,7 +339,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), }; -@@ -1087,6 +1227,10 @@ enum vkd3d_shader_target_type +@@ -1087,6 +1266,10 @@ enum vkd3d_shader_target_type * Output is a raw FX section without container. \since 1.11 */ VKD3D_SHADER_TARGET_FX, @@ -283,7 +350,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET_TYPE), }; -@@ -1292,7 +1436,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, +@@ -1292,7 +1475,8 @@ typedef int (*PFN_vkd3d_shader_open_include)(const char *filename, bool local, * vkd3d_shader_preprocess_info. * * \param code Contents of the included file, which were allocated by the @@ -293,7 +360,7 @@ index d9a355d3bc9..5c0d13ea9e2 100644 * * \param context The user-defined pointer passed to struct * vkd3d_shader_preprocess_info. -@@ -1319,8 +1464,8 @@ struct vkd3d_shader_preprocess_info +@@ -1319,8 +1503,8 @@ struct vkd3d_shader_preprocess_info /** * Pointer to an array of predefined macros. Each macro in this array will @@ -304,7 +371,42 @@ index d9a355d3bc9..5c0d13ea9e2 100644 * * If the same macro is specified multiple times, only the last value is * used. -@@ -2798,7 +2943,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_ +@@ -1861,6 +2045,26 @@ struct vkd3d_shader_scan_combined_resource_sampler_info + unsigned int combined_sampler_count; + }; + ++/** ++ * A chained structure describing the tessellation information in a hull shader. ++ * ++ * This structure extends vkd3d_shader_compile_info. ++ * ++ * \since 1.15 ++ */ ++struct vkd3d_shader_scan_hull_shader_tessellation_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_HULL_SHADER_TESSELLATION_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** The tessellation output primitive. */ ++ enum vkd3d_shader_tessellator_output_primitive output_primitive; ++ /** The tessellation partitioning mode. */ ++ enum vkd3d_shader_tessellator_partitioning partitioning; ++}; ++ + /** + * Data type of a shader varying, returned as part of struct + * vkd3d_shader_signature_element. +@@ -2333,6 +2537,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported + * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_D3D_BYTECODE + * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_DXBC_TPF + * - VKD3D_SHADER_SOURCE_HLSL to VKD3D_SHADER_TARGET_FX ++ * - VKD3D_SHADER_SOURCE_FX to VKD3D_SHADER_TARGET_D3D_ASM + * + * Supported transformations can also be detected at runtime with the functions + * vkd3d_shader_get_supported_source_types() and +@@ -2798,7 +3003,7 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_ * \param input_signature The input signature of the second shader. * * \param count On output, contains the number of entries written into @@ -422,7 +524,7 @@ index d9560628c77..45de1c92513 100644 memcpy(checksum, ctx.digest, sizeof(ctx.digest)); } diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 77e9711300f..38d566d9fe0 100644 +index 77e9711300f..7c5444f63a3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -79,7 +79,7 @@ static const char * const shader_opcode_names[] = @@ -452,7 +554,24 @@ index 77e9711300f..38d566d9fe0 100644 [VKD3DSIH_DDIV ] = "ddiv", [VKD3DSIH_DEF ] = "def", [VKD3DSIH_DEFAULT ] = "default", -@@ -675,9 +675,6 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum +@@ -393,14 +393,13 @@ static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type regi + } + } + +-static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, +- enum vkd3d_shader_global_flags global_flags) ++static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags) + { + unsigned int i; + + static const struct + { +- enum vkd3d_shader_global_flags flag; ++ enum vsir_global_flags flag; + const char *name; + } + global_flag_info[] = +@@ -675,9 +674,6 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum { [VKD3D_DATA_FLOAT ] = "float", [VKD3D_DATA_INT ] = "int", @@ -462,7 +581,22 @@ index 77e9711300f..38d566d9fe0 100644 [VKD3D_DATA_UINT ] = "uint", [VKD3D_DATA_UNORM ] = "unorm", [VKD3D_DATA_SNORM ] = "snorm", -@@ -1229,8 +1226,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1193,6 +1189,14 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); + break; + ++ case VKD3DSPR_PARAMETER: ++ vkd3d_string_buffer_printf(buffer, "parameter"); ++ break; ++ ++ case VKD3DSPR_POINT_COORD: ++ vkd3d_string_buffer_printf(buffer, "vPointCoord"); ++ break; ++ + default: + vkd3d_string_buffer_printf(buffer, "%s%s", + compiler->colours.error, reg->type, compiler->colours.reset); +@@ -1229,8 +1233,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const case VKD3D_DATA_INT: shader_print_int_literal(compiler, "", reg->u.immconst_u32[0], ""); break; @@ -471,7 +605,7 @@ index 77e9711300f..38d566d9fe0 100644 case VKD3D_DATA_UINT: shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); break; -@@ -1266,8 +1261,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1266,8 +1268,6 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[2], ""); shader_print_int_literal(compiler, ", ", reg->u.immconst_u32[3], ""); break; @@ -480,7 +614,7 @@ index 77e9711300f..38d566d9fe0 100644 case VKD3D_DATA_UINT: shader_print_uint_literal(compiler, "", reg->u.immconst_u32[0], ""); shader_print_uint_literal(compiler, ", ", reg->u.immconst_u32[1], ""); -@@ -1319,6 +1312,23 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const +@@ -1319,6 +1319,23 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const } vkd3d_string_buffer_printf(buffer, ")"); } @@ -504,7 +638,7 @@ index 77e9711300f..38d566d9fe0 100644 else if (reg->type != VKD3DSPR_RASTOUT && reg->type != VKD3DSPR_MISCTYPE && reg->type != VKD3DSPR_NULL -@@ -2258,7 +2268,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic +@@ -2258,7 +2275,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic } } @@ -513,7 +647,7 @@ index 77e9711300f..38d566d9fe0 100644 const char *name, const char *register_name, const struct shader_signature *signature) { struct vkd3d_string_buffer *buffer = &compiler->buffer; -@@ -2325,21 +2335,21 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, +@@ -2325,21 +2342,21 @@ static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, return VKD3D_OK; } @@ -539,7 +673,7 @@ index 77e9711300f..38d566d9fe0 100644 program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", &program->patch_constant_signature)) < 0) return ret; -@@ -2427,7 +2437,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, +@@ -2427,7 +2444,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, * doesn't even have an explicit concept of signature. */ if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) { @@ -548,7 +682,7 @@ index 77e9711300f..38d566d9fe0 100644 { vkd3d_string_buffer_cleanup(buffer); return result; -@@ -2489,12 +2499,58 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, +@@ -2489,12 +2506,58 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, return result; } @@ -611,7 +745,7 @@ index 77e9711300f..38d566d9fe0 100644 end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index d05394c3ab7..ae8e864c179 100644 +index d05394c3ab7..bda9bc72f56 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -104,6 +104,12 @@ enum vkd3d_sm1_resource_type @@ -752,11 +886,11 @@ index d05394c3ab7..ae8e864c179 100644 /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + if (!vsir_program_init(program, compile_info, &version, -+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, false)) ++ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -@@ -1338,9 +1386,6 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c +@@ -1338,23 +1386,19 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c for (i = 0; i < ARRAY_SIZE(program->flat_constant_count); ++i) program->flat_constant_count[i] = get_external_constant_count(&sm1, i); @@ -766,22 +900,14 @@ index d05394c3ab7..ae8e864c179 100644 if (sm1.p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; -@@ -1351,10 +1396,21 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c + if (ret < 0) + { +- WARN("Failed to parse shader.\n"); + vsir_program_cleanup(program); return ret; } - return ret; -+ if ((ret = vkd3d_shader_parser_validate(&sm1.p, config_flags)) < 0) -+ { -+ WARN("Failed to validate shader after parsing, ret %d.\n", ret); -+ -+ if (TRACE_ON()) -+ vsir_program_trace(program); -+ -+ vsir_program_cleanup(program); -+ return ret; -+ } -+ + return VKD3D_OK; } @@ -790,7 +916,7 @@ index d05394c3ab7..ae8e864c179 100644 unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) { unsigned int i; -@@ -1384,22 +1440,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, +@@ -1384,22 +1428,22 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, @@ -824,7 +950,7 @@ index d05394c3ab7..ae8e864c179 100644 {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, }; -@@ -1422,33 +1478,33 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, +@@ -1422,33 +1466,33 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, return false; } @@ -878,7 +1004,7 @@ index d05394c3ab7..ae8e864c179 100644 }; unsigned int i; -@@ -1468,21 +1524,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +@@ -1468,21 +1512,17 @@ bool hlsl_sm1_usage_from_semantic(const char *semantic_name, struct d3dbc_compiler { @@ -904,7 +1030,7 @@ index d05394c3ab7..ae8e864c179 100644 } D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) -@@ -1512,6 +1564,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) +@@ -1512,6 +1552,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -912,7 +1038,15 @@ index d05394c3ab7..ae8e864c179 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1617,6 +1670,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) +@@ -1524,6 +1565,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -1617,6 +1659,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -920,7 +1054,15 @@ index d05394c3ab7..ae8e864c179 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1709,7 +1763,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) +@@ -1629,6 +1672,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -1709,7 +1753,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) { @@ -929,7 +1071,7 @@ index d05394c3ab7..ae8e864c179 100644 unsigned int uniform_count = 0; struct hlsl_ir_var *var; -@@ -1741,15 +1795,16 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff +@@ -1741,15 +1785,16 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff size_offset = put_u32(buffer, 0); ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B')); @@ -948,7 +1090,7 @@ index d05394c3ab7..ae8e864c179 100644 LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { -@@ -1825,8 +1880,10 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff +@@ -1825,8 +1870,10 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff switch (comp_type->e.numeric.type) { case HLSL_TYPE_DOUBLE: @@ -961,7 +1103,7 @@ index d05394c3ab7..ae8e864c179 100644 break; case HLSL_TYPE_INT: -@@ -1860,24 +1917,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff +@@ -1860,24 +1907,24 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff set_u32(buffer, creator_offset, offset - ctab_start); ctab_end = bytecode_align(buffer); @@ -991,7 +1133,7 @@ index d05394c3ab7..ae8e864c179 100644 unsigned int writemask; uint32_t reg; } dst; -@@ -1885,7 +1942,7 @@ struct sm1_instruction +@@ -1885,7 +1932,7 @@ struct sm1_instruction struct sm1_src_register { enum vkd3d_shader_register_type type; @@ -1000,7 +1142,7 @@ index d05394c3ab7..ae8e864c179 100644 unsigned int swizzle; uint32_t reg; } srcs[4]; -@@ -1900,11 +1957,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) +@@ -1900,11 +1947,11 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) const struct sm1_dst_register *dst = &instr->dst; unsigned int i; @@ -1015,7 +1157,7 @@ index d05394c3ab7..ae8e864c179 100644 return false; if (src->type != dst->type) return false; -@@ -1923,13 +1980,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) +@@ -1923,13 +1970,19 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) { VKD3D_ASSERT(reg->writemask); @@ -1037,7 +1179,7 @@ index d05394c3ab7..ae8e864c179 100644 } static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) -@@ -1945,7 +2008,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s +@@ -1945,7 +1998,7 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); if (version->major > 1) @@ -1046,7 +1188,7 @@ index d05394c3ab7..ae8e864c179 100644 put_u32(buffer, token); if (instr->has_dst) -@@ -1955,346 +2018,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s +@@ -1955,346 +2008,112 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s write_sm1_src_register(buffer, &instr->srcs[i]); }; @@ -1057,7 +1199,9 @@ index d05394c3ab7..ae8e864c179 100644 - -static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) --{ ++static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( ++ struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) + { - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, @@ -1115,9 +1259,7 @@ index d05394c3ab7..ae8e864c179 100644 - -static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir( -+ struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode) - { +-{ - struct sm1_instruction instr = - { - .opcode = opcode, @@ -1456,7 +1598,7 @@ index d05394c3ab7..ae8e864c179 100644 token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); -@@ -2305,618 +2134,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, +@@ -2305,618 +2124,283 @@ static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, write_sm1_dst_register(buffer, ®); } @@ -1499,12 +1641,14 @@ index d05394c3ab7..ae8e864c179 100644 - } - } -} -- ++ reg_id = semantic->resource.reg.reg.idx[0].offset; + -static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -{ - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = -- { ++ if (semantic->resource.reg.reg.type != VKD3DSPR_SAMPLER) + { - .opcode = D3DSIO_MOV, - - .dst.type = VKD3DSPR_TEMP, @@ -1594,38 +1738,40 @@ index d05394c3ab7..ae8e864c179 100644 - if (expr->op == HLSL_OP1_REINTERPRET) - { - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -- return; -- } -- -- if (expr->op == HLSL_OP1_CAST) -- { -- d3dbc_write_cast(d3dbc, instr); -- return; -- } -+ reg_id = semantic->resource.reg.reg.idx[0].offset; - -- if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) -+ if (semantic->resource.reg.reg.type != VKD3DSPR_SAMPLER) - { -- /* These need to be lowered. */ -- hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); + vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_TYPE, + "dcl instruction with register type %u.", semantic->resource.reg.reg.type); + d3dbc->failed = true; return; } -- switch (expr->op) +- if (expr->op == HLSL_OP1_CAST) + switch (semantic->resource_type) { +- d3dbc_write_cast(d3dbc, instr); +- return; +- } +- +- if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT) +- { +- /* These need to be lowered. */ +- hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression."); +- return; +- } +- +- switch (expr->op) +- { - case HLSL_OP1_ABS: - d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); -- break; -- ++ case VKD3D_SHADER_RESOURCE_TEXTURE_2D: ++ d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_2D); + break; + - case HLSL_OP1_DSX: - d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); -- break; -- ++ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: ++ d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_CUBE); + break; + - case HLSL_OP1_DSY: - d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); - break; @@ -1640,16 +1786,12 @@ index d05394c3ab7..ae8e864c179 100644 - - case HLSL_OP1_NEG: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -+ case VKD3D_SHADER_RESOURCE_TEXTURE_2D: -+ d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_2D); - break; - +- break; +- - case HLSL_OP1_SAT: - d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); -+ case VKD3D_SHADER_RESOURCE_TEXTURE_CUBE: -+ d3dbc_write_vsir_sampler_dcl(d3dbc, reg_id, VKD3D_SM1_RESOURCE_TEXTURE_CUBE); - break; - +- break; +- - case HLSL_OP1_RCP: - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); - break; @@ -2259,7 +2401,7 @@ index d05394c3ab7..ae8e864c179 100644 { out->code = buffer->data; out->size = buffer->size; -@@ -2925,5 +2419,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, +@@ -2925,5 +2409,5 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, { vkd3d_free(buffer->data); } @@ -2327,10 +2469,51 @@ index 184788dc57e..f6ac8e0829e 100644 set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]); diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 4a17c62292b..f9f44f34bcf 100644 +index 4a17c62292b..d467693bd59 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -3888,7 +3888,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade +@@ -430,6 +430,7 @@ enum dx_intrinsic_opcode + DX_DERIV_COARSEY = 84, + DX_DERIV_FINEX = 85, + DX_DERIV_FINEY = 86, ++ DX_SAMPLE_INDEX = 90, + DX_COVERAGE = 91, + DX_THREAD_ID = 93, + DX_GROUP_ID = 94, +@@ -3827,6 +3828,11 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( + { + switch (sysval_semantic) + { ++ /* VSIR does not use an I/O register for SV_SampleIndex, but its ++ * signature element has a register index of UINT_MAX and it is ++ * convenient to return a valid register type here to handle it. */ ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ return VKD3DSPR_NULL; + case VKD3D_SHADER_SV_COVERAGE: + return VKD3DSPR_COVERAGE; + case VKD3D_SHADER_SV_DEPTH: +@@ -3844,6 +3850,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) + { + enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type; ++ enum vkd3d_shader_register_type io_reg_type; + bool is_patch_constant, is_control_point; + struct vkd3d_shader_dst_param *param; + const struct signature_element *e; +@@ -3876,9 +3883,10 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + + param = ¶ms[i]; + +- if (e->register_index == UINT_MAX) ++ if (e->register_index == UINT_MAX ++ && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) + { +- dst_param_io_init(param, e, register_type_from_dxil_semantic_kind(e->sysval_semantic)); ++ dst_param_io_init(param, e, io_reg_type); + continue; + } + +@@ -3888,7 +3896,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade if (is_control_point) { if (reg_type == VKD3DSPR_OUTPUT) @@ -2339,7 +2522,7 @@ index 4a17c62292b..f9f44f34bcf 100644 param->reg.idx[count++].offset = 0; } -@@ -4161,8 +4161,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ +@@ -4161,8 +4169,7 @@ static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_ dst_param_init(&dst_params[0]); dst_params[1].reg = ptr->u.reg; @@ -2349,7 +2532,7 @@ index 4a17c62292b..f9f44f34bcf 100644 dst_params[1].reg.idx[1].rel_addr = NULL; dst_params[1].reg.idx[1].offset = ~0u; dst_params[1].reg.idx_count = 1; -@@ -4175,6 +4174,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty +@@ -4175,6 +4182,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty const struct sm6_type *type_b, struct sm6_parser *sm6) { bool is_int = sm6_type_is_bool_i16_i32_i64(type_a); @@ -2357,7 +2540,7 @@ index 4a17c62292b..f9f44f34bcf 100644 bool is_bool = sm6_type_is_bool(type_a); enum vkd3d_shader_opcode op; bool is_valid; -@@ -4199,7 +4199,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty +@@ -4199,7 +4207,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty case BINOP_ADD: case BINOP_SUB: /* NEG is applied later for subtraction. */ @@ -2366,7 +2549,7 @@ index 4a17c62292b..f9f44f34bcf 100644 is_valid = !is_bool; break; case BINOP_AND: -@@ -4215,7 +4215,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty +@@ -4215,7 +4223,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty is_valid = is_int && !is_bool; break; case BINOP_MUL: @@ -2375,7 +2558,7 @@ index 4a17c62292b..f9f44f34bcf 100644 is_valid = !is_bool; break; case BINOP_OR: -@@ -4223,7 +4223,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty +@@ -4223,7 +4231,7 @@ static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_ty is_valid = is_int; break; case BINOP_SDIV: @@ -2384,7 +2567,7 @@ index 4a17c62292b..f9f44f34bcf 100644 is_valid = !is_bool; break; case BINOP_SREM: -@@ -4865,8 +4865,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr +@@ -4865,8 +4873,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) return; src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); @@ -2396,7 +2579,7 @@ index 4a17c62292b..f9f44f34bcf 100644 type = sm6_type_get_scalar_type(dst->type, 0); VKD3D_ASSERT(type); -@@ -4965,8 +4967,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int +@@ -4965,8 +4975,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int dst->u.handle.d = d; reg = &dst->u.handle.reg; @@ -2406,7 +2589,42 @@ index 4a17c62292b..f9f44f34bcf 100644 reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = id; register_index_address_init(®->idx[1], operands[2], sm6); -@@ -5871,6 +5872,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr +@@ -5794,6 +5803,34 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ + instruction_dst_param_init_ssa_vector(ins, component_count, sm6); + } + ++static void sm6_parser_emit_dx_sample_index(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ const struct shader_signature *signature = &sm6->p.program->input_signature; ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ unsigned int element_idx; ++ ++ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); ++ ++ /* SV_SampleIndex is identified in VSIR by its signature element index, ++ * but the index is not supplied as a parameter to the DXIL intrinsic. */ ++ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_SAMPLE_INDEX, 0, &element_idx)) ++ { ++ WARN("Sample index is not in the signature.\n"); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, ++ "Sample index signature element for a sample index operation is missing."); ++ return; ++ } ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param->reg = sm6->input_params[element_idx].reg; ++ src_param_init(src_param); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -5871,6 +5908,8 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr return; } e = &signature->elements[row_index]; @@ -2415,7 +2633,15 @@ index 4a17c62292b..f9f44f34bcf 100644 if (column_index >= VKD3D_VEC4_SIZE) { -@@ -6861,7 +6864,6 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re +@@ -6297,6 +6336,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_SAMPLE_C ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, + [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, + [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, ++ [DX_SAMPLE_INDEX ] = {"i", "", sm6_parser_emit_dx_sample_index}, + [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, + [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, + [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, +@@ -6861,7 +6901,6 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re struct vkd3d_shader_dst_param *dst_params; struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr, *cmp, *new; @@ -2423,7 +2649,7 @@ index 4a17c62292b..f9f44f34bcf 100644 unsigned int i = 0; bool is_volatile; uint64_t code; -@@ -6887,9 +6889,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re +@@ -6887,9 +6926,10 @@ static void sm6_parser_emit_cmpxchg(struct sm6_parser *sm6, const struct dxil_re return; } @@ -2437,7 +2663,7 @@ index 4a17c62292b..f9f44f34bcf 100644 if (!cmp || !new) return; -@@ -7287,7 +7290,6 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco +@@ -7287,7 +7327,6 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco unsigned int i = 0, alignment, operand_count; struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; @@ -2445,7 +2671,7 @@ index 4a17c62292b..f9f44f34bcf 100644 const struct sm6_value *ptr, *src; uint64_t alignment_code; -@@ -7299,13 +7301,14 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco +@@ -7299,13 +7338,14 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco return; } @@ -2463,7 +2689,15 @@ index 4a17c62292b..f9f44f34bcf 100644 { WARN("Type mismatch.\n"); vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_TYPE_MISMATCH, -@@ -8908,7 +8911,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, +@@ -8510,6 +8550,7 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = + [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, + [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, + [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, ++ [SEMANTIC_KIND_SAMPLEINDEX] = VKD3D_SHADER_SV_SAMPLE_INDEX, + [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, + [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, + [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, +@@ -8908,7 +8949,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, d->resource_type = ins->resource_type; d->kind = kind; d->reg_type = VKD3DSPR_RESOURCE; @@ -2472,7 +2706,7 @@ index 4a17c62292b..f9f44f34bcf 100644 d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; -@@ -8982,7 +8985,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, +@@ -8982,7 +9023,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, d->resource_type = ins->resource_type; d->kind = values[0]; d->reg_type = VKD3DSPR_UAV; @@ -2481,7 +2715,7 @@ index 4a17c62292b..f9f44f34bcf 100644 d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; -@@ -9346,7 +9349,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9346,7 +9387,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Signature element is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element is not a metadata node."); @@ -2490,7 +2724,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } element_node = m->u.node; -@@ -9355,7 +9358,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9355,7 +9396,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Invalid operand count %u.\n", element_node->operand_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Invalid signature element operand count %u.", element_node->operand_count); @@ -2499,7 +2733,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } if (element_node->operand_count > 11) { -@@ -9374,7 +9377,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9374,7 +9415,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Failed to load uint value at index %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element value at index %u is not an integer.", j); @@ -2508,7 +2742,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } } -@@ -9385,7 +9388,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9385,7 +9426,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const FIXME("Unsupported element id %u not equal to its index %u.\n", values[0], i); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A non-sequential and non-zero-based element id is not supported."); @@ -2517,7 +2751,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } if (!sm6_metadata_value_is_string(element_node->operands[1])) -@@ -9393,7 +9396,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9393,7 +9434,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Element name is not a string.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element name is not a metadata string."); @@ -2526,7 +2760,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } e->semantic_name = element_node->operands[1]->u.string_value; -@@ -9407,7 +9410,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9407,7 +9448,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled.", j); @@ -2535,7 +2769,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } if ((e->interpolation_mode = values[5]) >= VKD3DSIM_COUNT) -@@ -9415,7 +9418,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9415,7 +9456,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled interpolation mode %u.\n", e->interpolation_mode); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Interpolation mode %u is unhandled.", e->interpolation_mode); @@ -2544,7 +2778,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } e->register_count = values[6]; -@@ -9430,7 +9433,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9430,7 +9471,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Unhandled I/O register semantic kind %u.\n", j); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "DXIL semantic kind %u is unhandled for an I/O register.", j); @@ -2553,7 +2787,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } } else if (e->register_index > MAX_REG_OUTPUT || e->register_count > MAX_REG_OUTPUT - e->register_index) -@@ -9439,7 +9442,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9439,7 +9480,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting row of %u with count %u is invalid.", e->register_index, e->register_count); @@ -2562,7 +2796,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } index = values[9]; -@@ -9448,7 +9451,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9448,7 +9489,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Invalid column start %u with count %u.\n", index, column_count); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "A signature element starting column %u with count %u is invalid.", index, column_count); @@ -2571,7 +2805,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } e->mask = vkd3d_write_mask_from_component_count(column_count); -@@ -9471,7 +9474,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9471,7 +9512,7 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const WARN("Semantic index list is not a node.\n"); vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, "Signature element semantic index list is not a metadata node."); @@ -2580,7 +2814,7 @@ index 4a17c62292b..f9f44f34bcf 100644 } element_node = m->u.node; -@@ -9516,6 +9519,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const +@@ -9516,6 +9557,10 @@ static enum vkd3d_result sm6_parser_read_signature(struct sm6_parser *sm6, const s->element_count = operand_count; return VKD3D_OK; @@ -2591,7 +2825,33 @@ index 4a17c62292b..f9f44f34bcf 100644 } static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, const struct sm6_metadata_value *m, -@@ -9633,6 +9640,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co +@@ -9557,7 +9602,7 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons + + static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) + { +- enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; ++ enum vsir_global_flags global_flags, mask, rotated_flags; + struct vkd3d_shader_instruction *ins; + + if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) +@@ -9567,7 +9612,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm + "Global flags metadata value is not an integer."); + return; + } +- /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ ++ /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vsir_global_flags. */ + mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; + rotated_flags = global_flags & mask; + rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); +@@ -9575,6 +9620,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); + ins->declaration.global_flags = global_flags; ++ sm6->p.program->global_flags = global_flags; + } + + static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) +@@ -9633,6 +9679,7 @@ static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, co ins->declaration.thread_group_size.x = group_sizes[0]; ins->declaration.thread_group_size.y = group_sizes[1]; ins->declaration.thread_group_size.z = group_sizes[2]; @@ -2599,13 +2859,51 @@ index 4a17c62292b..f9f44f34bcf 100644 return VKD3D_OK; } -@@ -10303,12 +10311,28 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro +@@ -9670,12 +9717,13 @@ static void sm6_parser_emit_dcl_tessellator_domain(struct sm6_parser *sm6, + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TESSELLATOR_DOMAIN); + ins->declaration.tessellator_domain = tessellator_domain; ++ sm6->p.program->tess_domain = tessellator_domain; + } + +-static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, unsigned int count, +- const char *type) ++static void sm6_parser_validate_control_point_count(struct sm6_parser *sm6, ++ unsigned int count, bool allow_zero, const char *type) + { +- if (!count || count > 32) ++ if ((!count && !allow_zero) || count > 32) + { + WARN("%s control point count %u invalid.\n", type, count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_PROPERTIES, +@@ -9904,7 +9952,7 @@ static enum vkd3d_tessellator_domain sm6_parser_ds_properties_init(struct sm6_pa + } + + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[0]); +- sm6_parser_validate_control_point_count(sm6, operands[1], "Domain shader input"); ++ sm6_parser_validate_control_point_count(sm6, operands[1], true, "Domain shader input"); + sm6->p.program->input_control_point_count = operands[1]; + + return operands[0]; +@@ -9963,9 +10011,9 @@ static enum vkd3d_tessellator_domain sm6_parser_hs_properties_init(struct sm6_pa + } + } + +- sm6_parser_validate_control_point_count(sm6, operands[1], "Hull shader input"); ++ sm6_parser_validate_control_point_count(sm6, operands[1], false, "Hull shader input"); + program->input_control_point_count = operands[1]; +- sm6_parser_validate_control_point_count(sm6, operands[2], "Hull shader output"); ++ sm6_parser_validate_control_point_count(sm6, operands[2], false, "Hull shader output"); + sm6_parser_emit_dcl_count(sm6, VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT, operands[2]); + program->output_control_point_count = operands[2]; + sm6_parser_emit_dcl_tessellator_domain(sm6, operands[3]); +@@ -10303,12 +10351,28 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; - if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) + if (!vsir_program_init(program, compile_info, &version, -+ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, true)) ++ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; @@ -2629,7 +2927,7 @@ index 4a17c62292b..f9f44f34bcf 100644 input_signature = &program->input_signature; output_signature = &program->output_signature; patch_constant_signature = &program->patch_constant_signature; -@@ -10526,9 +10550,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro +@@ -10526,9 +10590,16 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro dxil_block_destroy(&sm6->root_block); @@ -2646,7 +2944,7 @@ index 4a17c62292b..f9f44f34bcf 100644 vsir_program_cleanup(program); return ret; } -@@ -10570,18 +10601,25 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co +@@ -10570,18 +10641,10 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(byte_code); @@ -2658,30 +2956,18 @@ index 4a17c62292b..f9f44f34bcf 100644 - - sm6_parser_cleanup(&sm6); if (ret < 0) - { - WARN("Failed to parse shader.\n"); +- { +- WARN("Failed to parse shader.\n"); return ret; - } +- } - return ret; -+ if ((ret = vkd3d_shader_parser_validate(&sm6.p, config_flags)) < 0) -+ { -+ WARN("Failed to validate shader after parsing, ret %d.\n", ret); -+ -+ if (TRACE_ON()) -+ vsir_program_trace(program); -+ -+ sm6_parser_cleanup(&sm6); -+ vsir_program_cleanup(program); -+ return ret; -+ } -+ + sm6_parser_cleanup(&sm6); + + return VKD3D_OK; } diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index a1d1fd6572f..f3f7a2c765c 100644 +index a1d1fd6572f..cb42551ee8b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -25,6 +25,17 @@ static inline size_t put_u32_unaligned(struct vkd3d_bytecode_buffer *buffer, uin @@ -2797,17 +3083,13 @@ index a1d1fd6572f..f3f7a2c765c 100644 /* TODO: assignments */ if (var->state_block_count && var->state_blocks[0]->count) -@@ -459,25 +481,48 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) +@@ -459,25 +481,93 @@ static uint32_t get_fx_4_type_size(const struct hlsl_type *type) return type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float) * elements_count; } -static const uint32_t fx_4_numeric_base_type[] = +enum fx_4_type_constants - { -- [HLSL_TYPE_FLOAT] = 1, -- [HLSL_TYPE_INT ] = 2, -- [HLSL_TYPE_UINT ] = 3, -- [HLSL_TYPE_BOOL ] = 4, ++{ + /* Numeric types encoding */ + FX_4_NUMERIC_TYPE_FLOAT = 1, + FX_4_NUMERIC_TYPE_INT = 2, @@ -2824,16 +3106,65 @@ index a1d1fd6572f..f3f7a2c765c 100644 + FX_4_NUMERIC_COLUMN_MAJOR_MASK = 0x4000, + + /* Object types */ -+ FX_4_OBJECT_TYPE_STRING = 1, ++ FX_4_OBJECT_TYPE_STRING = 0x1, ++ FX_4_OBJECT_TYPE_BLEND_STATE = 0x2, ++ FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE = 0x3, ++ FX_4_OBJECT_TYPE_RASTERIZER_STATE = 0x4, ++ FX_4_OBJECT_TYPE_PIXEL_SHADER = 0x5, ++ FX_4_OBJECT_TYPE_VERTEX_SHADER = 0x6, ++ FX_4_OBJECT_TYPE_GEOMETRY_SHADER = 0x7, ++ FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO = 0x8, ++ ++ FX_4_OBJECT_TYPE_TEXTURE = 0x9, ++ FX_4_OBJECT_TYPE_TEXTURE_1D = 0xa, ++ FX_4_OBJECT_TYPE_TEXTURE_1DARRAY = 0xb, ++ FX_4_OBJECT_TYPE_TEXTURE_2D = 0xc, ++ FX_4_OBJECT_TYPE_TEXTURE_2DARRAY = 0xd, ++ FX_4_OBJECT_TYPE_TEXTURE_2DMS = 0xe, ++ FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY = 0xf, ++ FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10, ++ FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11, ++ FX_4_OBJECT_TYPE_RTV = 0x13, ++ FX_4_OBJECT_TYPE_DSV = 0x14, ++ FX_4_OBJECT_TYPE_SAMPLER_STATE = 0x15, ++ FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17, ++ ++ FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b, ++ FX_5_OBJECT_TYPE_COMPUTE_SHADER = 0x1c, ++ FX_5_OBJECT_TYPE_HULL_SHADER = 0x1d, ++ FX_5_OBJECT_TYPE_DOMAIN_SHADER = 0x1e, ++ ++ FX_5_OBJECT_TYPE_UAV_1D = 0x1f, ++ FX_5_OBJECT_TYPE_UAV_1DARRAY = 0x20, ++ FX_5_OBJECT_TYPE_UAV_2D = 0x21, ++ FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22, ++ FX_5_OBJECT_TYPE_UAV_3D = 0x23, ++ FX_5_OBJECT_TYPE_UAV_BUFFER = 0x24, ++ FX_5_OBJECT_TYPE_SRV_RAW_BUFFER = 0x25, ++ FX_5_OBJECT_TYPE_UAV_RAW_BUFFER = 0x26, ++ FX_5_OBJECT_TYPE_SRV_STRUCTURED_BUFFER = 0x27, ++ FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER = 0x28, ++ FX_5_OBJECT_TYPE_SRV_APPEND_STRUCTURED_BUFFER = 0x2b, ++ FX_5_OBJECT_TYPE_SRV_CONSUME_STRUCTURED_BUFFER = 0x2c, + + /* Types */ + FX_4_TYPE_CLASS_NUMERIC = 1, + FX_4_TYPE_CLASS_OBJECT = 2, + FX_4_TYPE_CLASS_STRUCT = 3, ++ ++ /* Assignment types */ ++ FX_4_ASSIGNMENT_CONSTANT = 0x1, ++ FX_4_ASSIGNMENT_VARIABLE = 0x2, ++ FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, ++ FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, +}; + +static const uint32_t fx_4_numeric_base_types[] = -+{ + { +- [HLSL_TYPE_FLOAT] = 1, +- [HLSL_TYPE_INT ] = 2, +- [HLSL_TYPE_UINT ] = 3, +- [HLSL_TYPE_BOOL ] = 4, + [HLSL_TYPE_HALF ] = FX_4_NUMERIC_TYPE_FLOAT, + [HLSL_TYPE_FLOAT] = FX_4_NUMERIC_TYPE_FLOAT, + [HLSL_TYPE_INT ] = FX_4_NUMERIC_TYPE_INT, @@ -2858,7 +3189,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 }; struct hlsl_ctx *ctx = fx->ctx; uint32_t value = 0; -@@ -497,20 +542,21 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, +@@ -497,20 +587,21 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, switch (type->e.numeric.type) { case HLSL_TYPE_FLOAT: @@ -2884,7 +3215,15 @@ index a1d1fd6572f..f3f7a2c765c 100644 return value; } -@@ -564,17 +610,32 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) +@@ -539,6 +630,7 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + [HLSL_SAMPLER_DIM_3D] = "RWTexture3D", + [HLSL_SAMPLER_DIM_BUFFER] = "RWBuffer", + [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", ++ [HLSL_SAMPLER_DIM_RAW_BUFFER] = "RWByteAddressBuffer", + }; + + switch (type->class) +@@ -564,17 +656,32 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_VERTEX_SHADER: return "VertexShader"; @@ -2917,7 +3256,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) { struct field_offsets -@@ -584,48 +645,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -584,48 +691,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co uint32_t offset; uint32_t type; }; @@ -2977,7 +3316,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 break; case HLSL_CLASS_DEPTH_STENCIL_STATE: -@@ -643,15 +702,16 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -643,48 +748,50 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_GEOMETRY_SHADER: case HLSL_CLASS_BLEND_STATE: case HLSL_CLASS_STRING: @@ -2996,7 +3335,8 @@ index a1d1fd6572f..f3f7a2c765c 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: -@@ -659,32 +719,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: vkd3d_unreachable(); case HLSL_CLASS_VOID: @@ -3040,7 +3380,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 { const struct field_offsets *field = &field_offsets[i]; -@@ -700,7 +760,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co +@@ -700,95 +807,96 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, 0); /* Interface count */ } } @@ -3049,8 +3389,26 @@ index a1d1fd6572f..f3f7a2c765c 100644 { static const uint32_t texture_type[] = { -@@ -716,13 +776,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, +- [HLSL_SAMPLER_DIM_GENERIC] = 9, +- [HLSL_SAMPLER_DIM_1D] = 10, +- [HLSL_SAMPLER_DIM_1DARRAY] = 11, +- [HLSL_SAMPLER_DIM_2D] = 12, +- [HLSL_SAMPLER_DIM_2DARRAY] = 13, +- [HLSL_SAMPLER_DIM_2DMS] = 14, +- [HLSL_SAMPLER_DIM_2DMSARRAY] = 15, +- [HLSL_SAMPLER_DIM_3D] = 16, +- [HLSL_SAMPLER_DIM_CUBE] = 17, +- [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, ++ [HLSL_SAMPLER_DIM_GENERIC] = FX_4_OBJECT_TYPE_TEXTURE, ++ [HLSL_SAMPLER_DIM_1D] = FX_4_OBJECT_TYPE_TEXTURE_1D, ++ [HLSL_SAMPLER_DIM_1DARRAY] = FX_4_OBJECT_TYPE_TEXTURE_1DARRAY, ++ [HLSL_SAMPLER_DIM_2D] = FX_4_OBJECT_TYPE_TEXTURE_2D, ++ [HLSL_SAMPLER_DIM_2DARRAY] = FX_4_OBJECT_TYPE_TEXTURE_2DARRAY, ++ [HLSL_SAMPLER_DIM_2DMS] = FX_4_OBJECT_TYPE_TEXTURE_2DMS, ++ [HLSL_SAMPLER_DIM_2DMSARRAY] = FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY, ++ [HLSL_SAMPLER_DIM_3D] = FX_4_OBJECT_TYPE_TEXTURE_3D, ++ [HLSL_SAMPLER_DIM_CUBE] = FX_4_OBJECT_TYPE_TEXTURE_CUBE, ++ [HLSL_SAMPLER_DIM_CUBEARRAY] = FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY, }; - put_u32_unaligned(buffer, texture_type[type->sampler_dim]); @@ -3059,15 +3417,29 @@ index a1d1fd6572f..f3f7a2c765c 100644 - else if (type->class == HLSL_CLASS_SAMPLER) + else if (element_type->class == HLSL_CLASS_SAMPLER) { - put_u32_unaligned(buffer, 21); +- put_u32_unaligned(buffer, 21); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_SAMPLER_STATE); } - else if (type->class == HLSL_CLASS_UAV) + else if (element_type->class == HLSL_CLASS_UAV) { static const uint32_t uav_type[] = { -@@ -735,60 +795,60 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, +- [HLSL_SAMPLER_DIM_1D] = 31, +- [HLSL_SAMPLER_DIM_1DARRAY] = 32, +- [HLSL_SAMPLER_DIM_2D] = 33, +- [HLSL_SAMPLER_DIM_2DARRAY] = 34, +- [HLSL_SAMPLER_DIM_3D] = 35, +- [HLSL_SAMPLER_DIM_BUFFER] = 36, +- [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, ++ [HLSL_SAMPLER_DIM_1D] = FX_5_OBJECT_TYPE_UAV_1D, ++ [HLSL_SAMPLER_DIM_1DARRAY] = FX_5_OBJECT_TYPE_UAV_1DARRAY, ++ [HLSL_SAMPLER_DIM_2D] = FX_5_OBJECT_TYPE_UAV_2D, ++ [HLSL_SAMPLER_DIM_2DARRAY] = FX_5_OBJECT_TYPE_UAV_2DARRAY, ++ [HLSL_SAMPLER_DIM_3D] = FX_5_OBJECT_TYPE_UAV_3D, ++ [HLSL_SAMPLER_DIM_BUFFER] = FX_5_OBJECT_TYPE_UAV_BUFFER, ++ [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER, ++ [HLSL_SAMPLER_DIM_RAW_BUFFER] = FX_5_OBJECT_TYPE_UAV_RAW_BUFFER, }; - put_u32_unaligned(buffer, uav_type[type->sampler_dim]); @@ -3076,37 +3448,44 @@ index a1d1fd6572f..f3f7a2c765c 100644 - else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) + else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) { - put_u32_unaligned(buffer, 20); +- put_u32_unaligned(buffer, 20); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DSV); } - else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) + else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) { - put_u32_unaligned(buffer, 19); +- put_u32_unaligned(buffer, 19); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RTV); } - else if (type->class == HLSL_CLASS_PIXEL_SHADER) + else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) { - put_u32_unaligned(buffer, 5); +- put_u32_unaligned(buffer, 5); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_PIXEL_SHADER); } - else if (type->class == HLSL_CLASS_VERTEX_SHADER) + else if (element_type->class == HLSL_CLASS_VERTEX_SHADER) { - put_u32_unaligned(buffer, 6); +- put_u32_unaligned(buffer, 6); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_VERTEX_SHADER); } - else if (type->class == HLSL_CLASS_RASTERIZER_STATE) + else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE) { - put_u32_unaligned(buffer, 4); +- put_u32_unaligned(buffer, 4); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RASTERIZER_STATE); } - else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) + else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) { - put_u32_unaligned(buffer, 3); +- put_u32_unaligned(buffer, 3); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE); } - else if (type->class == HLSL_CLASS_BLEND_STATE) + else if (element_type->class == HLSL_CLASS_BLEND_STATE) { - put_u32_unaligned(buffer, 2); +- put_u32_unaligned(buffer, 2); ++ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_BLEND_STATE); } - else if (type->class == HLSL_CLASS_STRING) + else if (element_type->class == HLSL_CLASS_STRING) @@ -3124,17 +3503,20 @@ index a1d1fd6572f..f3f7a2c765c 100644 - else if (type->class == HLSL_CLASS_COMPUTE_SHADER) + else if (element_type->class == HLSL_CLASS_COMPUTE_SHADER) { - put_u32_unaligned(buffer, 28); +- put_u32_unaligned(buffer, 28); ++ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_COMPUTE_SHADER); } - else if (type->class == HLSL_CLASS_HULL_SHADER) + else if (element_type->class == HLSL_CLASS_HULL_SHADER) { - put_u32_unaligned(buffer, 29); +- put_u32_unaligned(buffer, 29); ++ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_HULL_SHADER); } - else if (type->class == HLSL_CLASS_DOMAIN_SHADER) + else if (element_type->class == HLSL_CLASS_DOMAIN_SHADER) { - put_u32_unaligned(buffer, 30); +- put_u32_unaligned(buffer, 30); ++ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_DOMAIN_SHADER); } else { @@ -3143,7 +3525,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); } -@@ -963,16 +1023,16 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n +@@ -963,16 +1071,16 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_context *fx) { @@ -3164,7 +3546,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) { -@@ -980,47 +1040,128 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex +@@ -980,47 +1088,128 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex ++count; } @@ -3318,7 +3700,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 /* Note that struct fields must all be numeric; * this was validated in check_invalid_object_fields(). */ -@@ -1030,21 +1171,20 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f +@@ -1030,21 +1219,20 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: case HLSL_CLASS_STRUCT: @@ -3350,7 +3732,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 break; } -@@ -1070,6 +1210,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type +@@ -1070,6 +1258,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type return is_type_supported_fx_2(ctx, type->e.array.type, loc); case HLSL_CLASS_TEXTURE: @@ -3358,7 +3740,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: -@@ -1083,9 +1224,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type +@@ -1083,9 +1272,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type } break; @@ -3371,7 +3753,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 case HLSL_CLASS_VERTEX_SHADER: hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); return false; -@@ -1104,6 +1246,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type +@@ -1104,10 +1294,12 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type return false; case HLSL_CLASS_EFFECT_GROUP: @@ -3379,7 +3761,12 @@ index a1d1fd6572f..f3f7a2c765c 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_CONSTANT_BUFFER: -@@ -1117,8 +1260,8 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + /* This cannot appear as an extern variable. */ + break; + } +@@ -1117,8 +1309,8 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type static void write_fx_2_parameters(struct fx_write_context *fx) { @@ -3389,7 +3776,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_var *var; enum fx_2_parameter_flags -@@ -1138,23 +1281,35 @@ static void write_fx_2_parameters(struct fx_write_context *fx) +@@ -1138,23 +1330,35 @@ static void write_fx_2_parameters(struct fx_write_context *fx) if (var->storage_modifiers & HLSL_STORAGE_SHARED) flags |= IS_SHARED; @@ -3431,7 +3818,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 }; static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) -@@ -1180,19 +1335,18 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -1180,19 +1384,18 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) object_count = put_u32(structured, 0); write_fx_2_parameters(&fx); @@ -3458,7 +3845,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 size = align(fx.unstructured.size, 4); set_u32(&buffer, offset, size); -@@ -1201,6 +1355,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -1201,6 +1404,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data); @@ -3466,7 +3853,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 if (!fx.technique_count) hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); -@@ -1252,6 +1407,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl +@@ -1252,6 +1456,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl switch (type->e.numeric.type) { case HLSL_TYPE_FLOAT: @@ -3474,16 +3861,38 @@ index a1d1fd6572f..f3f7a2c765c 100644 case HLSL_TYPE_INT: case HLSL_TYPE_UINT: case HLSL_TYPE_BOOL: -@@ -1420,7 +1576,7 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: +@@ -1412,20 +1617,17 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s + + for (i = 0; i < count; ++i) + { +- if (hlsl_is_numeric_type(data_type)) ++ switch (data_type->e.numeric.type) + { +- switch (data_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- case HLSL_TYPE_BOOL: - type = fx_4_numeric_base_type[data_type->e.numeric.type]; -+ type = fx_4_numeric_base_types[data_type->e.numeric.type]; - break; - default: - type = 0; -@@ -1438,11 +1594,14 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s +- break; +- default: +- type = 0; +- hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); +- } ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ type = fx_4_numeric_base_types[data_type->e.numeric.type]; ++ break; ++ default: ++ type = 0; ++ hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); + } + + put_u32_unaligned(buffer, type); +@@ -1438,11 +1640,14 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, struct fx_write_context *fx) { @@ -3501,7 +3910,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 put_u32(buffer, entry->name_id); put_u32(buffer, entry->lhs_index); -@@ -1453,7 +1612,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl +@@ -1453,21 +1658,77 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl { case HLSL_IR_CONSTANT: { @@ -3509,8 +3918,9 @@ index a1d1fd6572f..f3f7a2c765c 100644 + c = hlsl_ir_constant(value); value_offset = write_fx_4_state_numeric_value(c, fx); - assignment_type = 1; -@@ -1461,15 +1620,71 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl +- assignment_type = 1; ++ assignment_type = FX_4_ASSIGNMENT_CONSTANT; + break; } case HLSL_IR_LOAD: { @@ -3522,10 +3932,11 @@ index a1d1fd6572f..f3f7a2c765c 100644 hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); - value_offset = write_fx_4_string(l->src.var->name, fx); +- assignment_type = 2; + value_offset = write_fx_4_string(load->src.var->name, fx); - assignment_type = 2; - break; - } ++ assignment_type = FX_4_ASSIGNMENT_VARIABLE; ++ break; ++ } + case HLSL_IR_INDEX: + { + struct hlsl_ir_index *index = hlsl_ir_index(value); @@ -3550,7 +3961,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 + c = hlsl_ir_constant(idx); + value_offset = put_u32(unstructured, value_offset); + put_u32(unstructured, c->value.u[0].u); -+ assignment_type = 3; ++ assignment_type = FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX; + + if (c->value.u[0].u >= type->e.array.elements_count) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, @@ -3571,7 +3982,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 + + value_offset = put_u32(unstructured, value_offset); + put_u32(unstructured, offset); -+ assignment_type = 4; ++ assignment_type = FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX; + break; + } + } @@ -3580,12 +3991,501 @@ index a1d1fd6572f..f3f7a2c765c 100644 + default: + hlsl_fixme(ctx, &var->loc, "Complex array index expressions in RHS values are not implemented."); + } -+ break; -+ } + break; + } default: - hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); +@@ -1575,6 +1836,7 @@ enum state_property_component_type + FX_BLEND, + FX_VERTEXSHADER, + FX_PIXELSHADER, ++ FX_COMPONENT_TYPE_COUNT, + }; + + static inline bool is_object_fx_type(enum state_property_component_type type) +@@ -1645,230 +1907,227 @@ static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_com + } + } + +-static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, +- struct fx_write_context *fx) +-{ +- static const struct rhs_named_value filter_values[] = +- { +- { "MIN_MAG_MIP_POINT", 0x00 }, +- { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, +- { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, +- { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, +- { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, +- { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, +- { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, +- { "MIN_MAG_MIP_LINEAR", 0x15 }, +- { "ANISOTROPIC", 0x55 }, +- { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, +- { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, +- { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, +- { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, +- { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, +- { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, +- { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, +- { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, +- { "COMPARISON_ANISOTROPIC", 0xd5 }, +- { NULL }, +- }; +- +- static const struct rhs_named_value address_values[] = +- { +- { "WRAP", 1 }, +- { "MIRROR", 2 }, +- { "CLAMP", 3 }, +- { "BORDER", 4 }, +- { "MIRROR_ONCE", 5 }, +- { NULL }, +- }; ++static const struct rhs_named_value filter_values[] = ++{ ++ { "MIN_MAG_MIP_POINT", 0x00 }, ++ { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, ++ { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, ++ { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, ++ { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, ++ { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, ++ { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, ++ { "MIN_MAG_MIP_LINEAR", 0x15 }, ++ { "ANISOTROPIC", 0x55 }, ++ { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, ++ { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, ++ { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, ++ { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, ++ { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, ++ { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, ++ { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, ++ { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, ++ { "COMPARISON_ANISOTROPIC", 0xd5 }, ++ { NULL }, ++}; + +- static const struct rhs_named_value compare_func_values[] = +- { +- { "NEVER", 1 }, +- { "LESS", 2 }, +- { "EQUAL", 3 }, +- { "LESS_EQUAL", 4 }, +- { "GREATER", 5 }, +- { "NOT_EQUAL", 6 }, +- { "GREATER_EQUAL", 7 }, +- { "ALWAYS", 8 }, +- { NULL } +- }; ++static const struct rhs_named_value address_values[] = ++{ ++ { "WRAP", 1 }, ++ { "MIRROR", 2 }, ++ { "CLAMP", 3 }, ++ { "BORDER", 4 }, ++ { "MIRROR_ONCE", 5 }, ++ { NULL }, ++}; + +- static const struct rhs_named_value depth_write_mask_values[] = +- { +- { "ZERO", 0 }, +- { "ALL", 1 }, +- { NULL } +- }; ++static const struct rhs_named_value compare_func_values[] = ++{ ++ { "NEVER", 1 }, ++ { "LESS", 2 }, ++ { "EQUAL", 3 }, ++ { "LESS_EQUAL", 4 }, ++ { "GREATER", 5 }, ++ { "NOT_EQUAL", 6 }, ++ { "GREATER_EQUAL", 7 }, ++ { "ALWAYS", 8 }, ++ { NULL } ++}; + +- static const struct rhs_named_value comparison_values[] = +- { +- { "NEVER", 1 }, +- { "LESS", 2 }, +- { "EQUAL", 3 }, +- { "LESS_EQUAL", 4 }, +- { "GREATER", 5 }, +- { "NOT_EQUAL", 6 }, +- { "GREATER_EQUAL", 7 }, +- { "ALWAYS", 8 }, +- { NULL } +- }; ++static const struct rhs_named_value depth_write_mask_values[] = ++{ ++ { "ZERO", 0 }, ++ { "ALL", 1 }, ++ { NULL } ++}; + +- static const struct rhs_named_value stencil_op_values[] = +- { +- { "KEEP", 1 }, +- { "ZERO", 2 }, +- { "REPLACE", 3 }, +- { "INCR_SAT", 4 }, +- { "DECR_SAT", 5 }, +- { "INVERT", 6 }, +- { "INCR", 7 }, +- { "DECR", 8 }, +- { NULL } +- }; ++static const struct rhs_named_value comparison_values[] = ++{ ++ { "NEVER", 1 }, ++ { "LESS", 2 }, ++ { "EQUAL", 3 }, ++ { "LESS_EQUAL", 4 }, ++ { "GREATER", 5 }, ++ { "NOT_EQUAL", 6 }, ++ { "GREATER_EQUAL", 7 }, ++ { "ALWAYS", 8 }, ++ { NULL } ++}; + +- static const struct rhs_named_value fill_values[] = +- { +- { "WIREFRAME", 2 }, +- { "SOLID", 3 }, +- { NULL } +- }; ++static const struct rhs_named_value stencil_op_values[] = ++{ ++ { "KEEP", 1 }, ++ { "ZERO", 2 }, ++ { "REPLACE", 3 }, ++ { "INCR_SAT", 4 }, ++ { "DECR_SAT", 5 }, ++ { "INVERT", 6 }, ++ { "INCR", 7 }, ++ { "DECR", 8 }, ++ { NULL } ++}; + +- static const struct rhs_named_value cull_values[] = +- { +- { "NONE", 1 }, +- { "FRONT", 2 }, +- { "BACK", 3 }, +- { NULL } +- }; ++static const struct rhs_named_value fill_values[] = ++{ ++ { "WIREFRAME", 2 }, ++ { "SOLID", 3 }, ++ { NULL } ++}; + +- static const struct rhs_named_value blend_values[] = +- { +- { "ZERO", 1 }, +- { "ONE", 2 }, +- { "SRC_COLOR", 3 }, +- { "INV_SRC_COLOR", 4 }, +- { "SRC_ALPHA", 5 }, +- { "INV_SRC_ALPHA", 6 }, +- { "DEST_ALPHA", 7 }, +- { "INV_DEST_ALPHA", 8 }, +- { "DEST_COLOR", 9 }, +- { "INV_DEST_COLOR", 10 }, +- { "SRC_ALPHA_SAT", 11 }, +- { "BLEND_FACTOR", 14 }, +- { "INV_BLEND_FACTOR", 15 }, +- { "SRC1_COLOR", 16 }, +- { "INV_SRC1_COLOR", 17 }, +- { "SRC1_ALPHA", 18 }, +- { "INV_SRC1_ALPHA", 19 }, +- { NULL } +- }; ++static const struct rhs_named_value cull_values[] = ++{ ++ { "NONE", 1 }, ++ { "FRONT", 2 }, ++ { "BACK", 3 }, ++ { NULL } ++}; + +- static const struct rhs_named_value blendop_values[] = +- { +- { "ADD", 1 }, +- { "SUBTRACT", 2 }, +- { "REV_SUBTRACT", 3 }, +- { "MIN", 4 }, +- { "MAX", 5 }, +- { NULL } +- }; ++static const struct rhs_named_value blend_values[] = ++{ ++ { "ZERO", 1 }, ++ { "ONE", 2 }, ++ { "SRC_COLOR", 3 }, ++ { "INV_SRC_COLOR", 4 }, ++ { "SRC_ALPHA", 5 }, ++ { "INV_SRC_ALPHA", 6 }, ++ { "DEST_ALPHA", 7 }, ++ { "INV_DEST_ALPHA", 8 }, ++ { "DEST_COLOR", 9 }, ++ { "INV_DEST_COLOR", 10 }, ++ { "SRC_ALPHA_SAT", 11 }, ++ { "BLEND_FACTOR", 14 }, ++ { "INV_BLEND_FACTOR", 15 }, ++ { "SRC1_COLOR", 16 }, ++ { "INV_SRC1_COLOR", 17 }, ++ { "SRC1_ALPHA", 18 }, ++ { "INV_SRC1_ALPHA", 19 }, ++ { NULL } ++}; + +- static const struct rhs_named_value bool_values[] = +- { +- { "FALSE", 0 }, +- { "TRUE", 1 }, +- { NULL } +- }; ++static const struct rhs_named_value blendop_values[] = ++{ ++ { "ADD", 1 }, ++ { "SUBTRACT", 2 }, ++ { "REV_SUBTRACT", 3 }, ++ { "MIN", 4 }, ++ { "MAX", 5 }, ++ { NULL } ++}; + +- static const struct rhs_named_value null_values[] = +- { +- { "NULL", 0 }, +- { NULL } +- }; ++static const struct rhs_named_value bool_values[] = ++{ ++ { "FALSE", 0 }, ++ { "TRUE", 1 }, ++ { NULL } ++}; + +- static const struct state +- { +- const char *name; +- enum hlsl_type_class container; +- enum hlsl_type_class class; +- enum state_property_component_type type; +- unsigned int dimx; +- unsigned int array_size; +- uint32_t id; +- const struct rhs_named_value *values; +- } +- states[] = +- { +- { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, +- { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, +- { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, +- { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, +- { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, +- +- { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, +- { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, +- { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, +- { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, +- { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, +- +- { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, +- { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, +- { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, +- { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, +- { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, +- { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, +- { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, +- { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, +- { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, +- { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, +- +- { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, +- { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, +- { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, +- { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, +- { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, +- { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, +- { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, +- { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, +- { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, +- { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, +- { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, +- { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, +- { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, +- { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, +- +- { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, +- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, +- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, +- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, +- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, +- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, +- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, +- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, +- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, +- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, +- { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, +- +- { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, +- { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, +- { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, +- }; ++static const struct rhs_named_value null_values[] = ++{ ++ { "NULL", 0 }, ++ { NULL } ++}; + +- static const struct state fx_4_blend_states[] = +- { +- { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, +- { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, +- { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, +- { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, +- { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, +- { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, +- { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, +- { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, +- { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, +- }; ++static const struct fx_4_state ++{ ++ const char *name; ++ enum hlsl_type_class container; ++ enum hlsl_type_class class; ++ enum state_property_component_type type; ++ unsigned int dimx; ++ unsigned int array_size; ++ int id; ++ const struct rhs_named_value *values; ++} ++fx_4_states[] = ++{ ++ { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, ++ { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, ++ { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, ++ { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, ++ { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, ++ ++ { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, ++ { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, ++ { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, ++ { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, ++ { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, ++ ++ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, ++ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, ++ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, ++ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, ++ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, ++ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, ++ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, ++ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, ++ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, ++ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, ++ ++ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, ++ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, ++ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, ++ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, ++ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, ++ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, ++ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, ++ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, ++ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, ++ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, ++ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, ++ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, ++ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, ++ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, ++ ++ { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, ++ { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, ++ { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, ++ { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, ++ { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, ++ { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, ++ { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, ++ { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, ++ { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, ++ ++ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, ++ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, ++ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, ++ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, ++ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, ++ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, ++ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, ++ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, ++ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, ++ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, ++ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, ++ ++ { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, ++ { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, ++ { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, ++}; + +- static const struct state fx_5_blend_states[] = ++static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, ++ struct fx_write_context *fx) ++{ ++ static const struct fx_4_state fx_5_blend_states[] = + { + { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, + { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, +@@ -1883,36 +2142,28 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + + struct state_table + { +- const struct state *ptr; ++ const struct fx_4_state *ptr; + unsigned int count; + } table; + + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct replace_state_context replace_context; ++ const struct fx_4_state *state = NULL; + struct hlsl_type *state_type = NULL; + struct hlsl_ir_node *node, *cast; +- const struct state *state = NULL; + struct hlsl_ctx *ctx = fx->ctx; + enum hlsl_base_type base_type; + unsigned int i; + +- if (type->class == HLSL_CLASS_BLEND_STATE) ++ if (type->class == HLSL_CLASS_BLEND_STATE && ctx->profile->major_version == 5) + { +- if (ctx->profile->major_version == 4) +- { +- table.ptr = fx_4_blend_states; +- table.count = ARRAY_SIZE(fx_4_blend_states); +- } +- else +- { +- table.ptr = fx_5_blend_states; +- table.count = ARRAY_SIZE(fx_5_blend_states); +- } ++ table.ptr = fx_5_blend_states; ++ table.count = ARRAY_SIZE(fx_5_blend_states); } -@@ -2118,7 +2333,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, + else + { +- table.ptr = states; +- table.count = ARRAY_SIZE(states); ++ table.ptr = fx_4_states; ++ table.count = ARRAY_SIZE(fx_4_states); + } + + for (i = 0; i < table.count; ++i) +@@ -2118,7 +2369,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, const struct function_component *comp = &components[i]; unsigned int arg_index = (i + 1) % entry->args_count; block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, @@ -3594,7 +4494,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 } hlsl_free_state_block_entry(entry); -@@ -2126,7 +2341,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, +@@ -2126,7 +2377,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, } /* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState @@ -3603,7 +4503,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, unsigned int entry_index, struct fx_write_context *fx) { -@@ -2140,7 +2355,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * +@@ -2140,7 +2391,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * if (type->class != HLSL_CLASS_BLEND_STATE) return 1; @@ -3612,7 +4512,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 return 1; if (entry->lhs_has_index) return 1; -@@ -2164,7 +2379,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * +@@ -2164,7 +2415,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * for (i = 1; i < array_size; ++i) { block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, @@ -3621,7 +4521,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 } return array_size; -@@ -2401,6 +2616,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx +@@ -2401,6 +2652,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx size = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -3631,7 +4531,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 if (var->buffer != b) continue; -@@ -2629,3 +2847,506 @@ int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) +@@ -2629,3 +2883,949 @@ int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_unreachable(); } } @@ -3642,6 +4542,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 + struct vkd3d_shader_message_context *message_context; + struct vkd3d_string_buffer buffer; + unsigned int indent; ++ unsigned int version; + struct + { + const uint8_t *ptr; @@ -3650,6 +4551,7 @@ index a1d1fd6572f..f3f7a2c765c 100644 + } unstructured; + uint32_t buffer_count; + uint32_t object_count; ++ uint32_t group_count; + bool failed; +}; + @@ -3708,19 +4610,28 @@ index a1d1fd6572f..f3f7a2c765c 100644 + return -1; +} + -+static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) ++static const void *fx_parser_get_unstructured_ptr(struct fx_parser *parser, uint32_t offset, size_t size) +{ + const uint8_t *ptr = parser->unstructured.ptr; + -+ memset(dst, 0, size); + if (offset >= parser->unstructured.size + || size > parser->unstructured.size - offset) + { + parser->failed = true; -+ return; ++ return NULL; + } + -+ ptr += offset; ++ return &ptr[offset]; ++} ++ ++static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) ++{ ++ const uint8_t *ptr; ++ ++ memset(dst, 0, size); ++ if (!(ptr = fx_parser_get_unstructured_ptr(parser, offset, size))) ++ return; ++ + memcpy(dst, ptr, size); +} + @@ -3814,6 +4725,9 @@ index a1d1fd6572f..f3f7a2c765c 100644 + const char *name, *type_name; + uint32_t count, i, value; + ++ if (parser->failed) ++ return; ++ + count = fx_parser_read_u32(parser); + + if (!count) @@ -3873,7 +4787,6 @@ index a1d1fd6572f..f3f7a2c765c 100644 + vkd3d_string_buffer_printf(&parser->buffer, ">"); +} + -+ +static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) +{ + struct fx_4_numeric_variable @@ -3951,6 +4864,348 @@ index a1d1fd6572f..f3f7a2c765c 100644 + } +} + ++static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) ++{ ++ struct vkd3d_shader_compile_info info = { 0 }; ++ struct vkd3d_shader_code output; ++ uint32_t data_size, offset; ++ const void *data = NULL; ++ const char *p, *q, *end; ++ struct fx_5_shader ++ { ++ uint32_t offset; ++ uint32_t sodecl[4]; ++ uint32_t sodecl_count; ++ uint32_t rast_stream; ++ uint32_t iface_bindings_count; ++ uint32_t iface_bindings; ++ } shader5; ++ struct fx_4_gs_so ++ { ++ uint32_t offset; ++ uint32_t sodecl; ++ } gs_so; ++ int ret; ++ ++ static const struct vkd3d_shader_compile_option options[] = ++ { ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, ++ }; ++ ++ switch (object_type) ++ { ++ case FX_4_OBJECT_TYPE_PIXEL_SHADER: ++ case FX_4_OBJECT_TYPE_VERTEX_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: ++ offset = fx_parser_read_u32(parser); ++ break; ++ ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: ++ fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); ++ offset = gs_so.offset; ++ break; ++ ++ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: ++ case FX_5_OBJECT_TYPE_HULL_SHADER: ++ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: ++ fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); ++ offset = shader5.offset; ++ break; ++ ++ default: ++ parser->failed = true; ++ return; ++ } ++ ++ fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); ++ if (data_size) ++ data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); ++ ++ if (!data) ++ return; ++ ++ info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; ++ info.source.code = data; ++ info.source.size = data_size; ++ info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; ++ info.target_type = VKD3D_SHADER_TARGET_D3D_ASM; ++ info.options = options; ++ info.option_count = ARRAY_SIZE(options); ++ info.log_level = VKD3D_SHADER_LOG_INFO; ++ ++ if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) ++ { ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, ++ "Failed to disassemble shader blob.\n"); ++ return; ++ } ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "asm {\n"); ++ ++ parse_fx_start_indent(parser); ++ ++ end = (const char *)output.code + output.size; ++ for (p = output.code; p < end; p = q) ++ { ++ if (!(q = memchr(p, '\n', end - p))) ++ q = end; ++ else ++ ++q; ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p); ++ } ++ ++ parse_fx_end_indent(parser); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}"); ++ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) ++ { ++ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", ++ fx_4_get_string(parser, gs_so.sodecl)); ++ } ++ else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) ++ { ++ for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) ++ { ++ if (shader5.sodecl[i]) ++ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", ++ i, fx_4_get_string(parser, shader5.sodecl[i])); ++ } ++ if (shader5.sodecl_count) ++ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); ++ } ++ ++ vkd3d_shader_free_shader_code(&output); ++} ++ ++static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) ++{ ++ switch (type->typeinfo) ++ { ++ case FX_4_OBJECT_TYPE_STRING: ++ case FX_4_OBJECT_TYPE_BLEND_STATE: ++ case FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE: ++ case FX_4_OBJECT_TYPE_RASTERIZER_STATE: ++ case FX_4_OBJECT_TYPE_SAMPLER_STATE: ++ case FX_4_OBJECT_TYPE_PIXEL_SHADER: ++ case FX_4_OBJECT_TYPE_VERTEX_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: ++ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: ++ case FX_5_OBJECT_TYPE_HULL_SHADER: ++ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static int fx_4_state_id_compare(const void *a, const void *b) ++{ ++ const struct fx_4_state *state = b; ++ int id = *(int *)a; ++ ++ return id - state->id; ++} ++ ++static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32_t count, ++ enum hlsl_type_class type_class) ++{ ++ struct fx_4_assignment ++ { ++ uint32_t id; ++ uint32_t lhs_index; ++ uint32_t type; ++ uint32_t value; ++ } entry; ++ struct ++ { ++ uint32_t name; ++ uint32_t index; ++ } index; ++ struct ++ { ++ uint32_t type; ++ union ++ { ++ uint32_t u; ++ float f; ++ }; ++ } value; ++ static const char *value_types[FX_COMPONENT_TYPE_COUNT] = ++ { ++ [FX_BOOL] = "bool", ++ [FX_FLOAT] = "float", ++ [FX_UINT] = "uint", ++ [FX_UINT8] = "byte", ++ }; ++ const struct rhs_named_value *named_value; ++ uint32_t i, j, comp_count; ++ struct fx_4_state *state; ++ ++ for (i = 0; i < count; ++i) ++ { ++ fx_parser_read_u32s(parser, &entry, sizeof(entry)); ++ ++ if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), ++ sizeof(*fx_4_states), fx_4_state_id_compare))) ++ { ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); ++ break; ++ } ++ ++ if (state->container != type_class) ++ { ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, ++ "State '%s' does not belong to object type class %#x.", state->name, type_class); ++ break; ++ } ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "%s", state->name); ++ if (state->array_size > 1) ++ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry.lhs_index); ++ vkd3d_string_buffer_printf(&parser->buffer, " = "); ++ ++ switch (entry.type) ++ { ++ case FX_4_ASSIGNMENT_CONSTANT: ++ ++ if (value_types[state->type]) ++ vkd3d_string_buffer_printf(&parser->buffer, "%s", value_types[state->type]); ++ if (state->dimx > 1) ++ vkd3d_string_buffer_printf(&parser->buffer, "%u", state->dimx); ++ vkd3d_string_buffer_printf(&parser->buffer, "("); ++ ++ fx_parser_read_unstructured(parser, &comp_count, entry.value, sizeof(uint32_t)); ++ ++ named_value = NULL; ++ if (comp_count == 1 && state->values && (state->type == FX_UINT || state->type == FX_BOOL)) ++ { ++ const struct rhs_named_value *ptr = state->values; ++ ++ fx_parser_read_unstructured(parser, &value, entry.value + 4, sizeof(value)); ++ ++ while (ptr->name) ++ { ++ if (value.u == ptr->value) ++ { ++ named_value = ptr; ++ break; ++ } ++ ++ptr; ++ } ++ } ++ ++ if (named_value) ++ { ++ vkd3d_string_buffer_printf(&parser->buffer, "%s /* %u */", named_value->name, named_value->value); ++ } ++ else ++ { ++ uint32_t offset = entry.value + 4; ++ ++ for (j = 0; j < comp_count; ++j, offset += sizeof(value)) ++ { ++ fx_parser_read_unstructured(parser, &value, offset, sizeof(value)); ++ ++ if (state->type == FX_UINT8) ++ vkd3d_string_buffer_printf(&parser->buffer, "0x%.2x", value.u); ++ else if (state->type == FX_UINT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); ++ else if (state->type == FX_FLOAT) ++ vkd3d_string_buffer_printf(&parser->buffer, "%g", value.f); ++ ++ if (comp_count > 1 && j < comp_count - 1) ++ vkd3d_string_buffer_printf(&parser->buffer, ", "); ++ } ++ } ++ ++ vkd3d_string_buffer_printf(&parser->buffer, ")"); ++ ++ break; ++ case FX_4_ASSIGNMENT_VARIABLE: ++ vkd3d_string_buffer_printf(&parser->buffer, "%s", fx_4_get_string(parser, entry.value)); ++ break; ++ case FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX: ++ fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index)); ++ vkd3d_string_buffer_printf(&parser->buffer, "%s[%u]", fx_4_get_string(parser, index.name), index.index); ++ break; ++ case FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX: ++ fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index)); ++ vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), ++ fx_4_get_string(parser, index.index)); ++ break; ++ default: ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, ++ "Unsupported assignment type %u.\n", entry.type); ++ } ++ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); ++ } ++} ++ ++static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct fx_4_binary_type *type) ++{ ++ static const enum hlsl_type_class type_classes[] = ++ { ++ [FX_4_OBJECT_TYPE_BLEND_STATE] = HLSL_CLASS_BLEND_STATE, ++ [FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE] = HLSL_CLASS_DEPTH_STENCIL_STATE, ++ [FX_4_OBJECT_TYPE_RASTERIZER_STATE] = HLSL_CLASS_RASTERIZER_STATE, ++ [FX_4_OBJECT_TYPE_SAMPLER_STATE] = HLSL_CLASS_SAMPLER, ++ }; ++ unsigned int i, element_count, count; ++ uint32_t value; ++ ++ if (!fx_4_object_has_initializer(type)) ++ return; ++ ++ vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); ++ element_count = max(type->element_count, 1); ++ for (i = 0; i < element_count; ++i) ++ { ++ switch (type->typeinfo) ++ { ++ case FX_4_OBJECT_TYPE_STRING: ++ vkd3d_string_buffer_printf(&parser->buffer, " "); ++ value = fx_parser_read_u32(parser); ++ fx_4_parse_string_initializer(parser, value); ++ break; ++ case FX_4_OBJECT_TYPE_BLEND_STATE: ++ case FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE: ++ case FX_4_OBJECT_TYPE_RASTERIZER_STATE: ++ case FX_4_OBJECT_TYPE_SAMPLER_STATE: ++ count = fx_parser_read_u32(parser); ++ ++ parse_fx_start_indent(parser); ++ fx_4_parse_state_object_initializer(parser, count, type_classes[type->typeinfo]); ++ parse_fx_end_indent(parser); ++ break; ++ case FX_4_OBJECT_TYPE_PIXEL_SHADER: ++ case FX_4_OBJECT_TYPE_VERTEX_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: ++ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: ++ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: ++ case FX_5_OBJECT_TYPE_HULL_SHADER: ++ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: ++ parse_fx_start_indent(parser); ++ fx_4_parse_shader_initializer(parser, type->typeinfo); ++ parse_fx_end_indent(parser); ++ break; ++ default: ++ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, ++ "Parsing object type %u is not implemented.", type->typeinfo); ++ return; ++ } ++ vkd3d_string_buffer_printf(&parser->buffer, ",\n"); ++ } ++ vkd3d_string_buffer_printf(&parser->buffer, "}"); ++} ++ +static void fx_4_parse_objects(struct fx_parser *parser) +{ + struct fx_4_object_variable @@ -3960,15 +5215,18 @@ index a1d1fd6572f..f3f7a2c765c 100644 + uint32_t semantic; + uint32_t bind_point; + } var; -+ uint32_t i, j, value, element_count; + struct fx_4_binary_type type; + const char *name, *type_name; ++ uint32_t i; + + if (parser->failed) + return; + + for (i = 0; i < parser->object_count; ++i) + { ++ if (parser->failed) ++ return; ++ + fx_parser_read_u32s(parser, &var, sizeof(var)); + fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); + @@ -3977,26 +5235,102 @@ index a1d1fd6572f..f3f7a2c765c 100644 + vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); + if (type.element_count) + vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -+ vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); + -+ element_count = max(type.element_count, 1); -+ for (j = 0; j < element_count; ++j) -+ { -+ switch (type.typeinfo) -+ { -+ case FX_4_OBJECT_TYPE_STRING: -+ vkd3d_string_buffer_printf(&parser->buffer, " "); -+ value = fx_parser_read_u32(parser); -+ fx_4_parse_string_initializer(parser, value); -+ break; -+ default: -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Parsing object type %u is not implemented.\n", type.typeinfo); -+ return; -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, ",\n"); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, "};\n"); ++ fx_4_parse_object_initializer(parser, &type); ++ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); ++ ++ fx_parse_fx_4_annotations(parser); ++ } ++} ++ ++static void fx_parse_fx_4_technique(struct fx_parser *parser) ++{ ++ struct fx_technique ++ { ++ uint32_t name; ++ uint32_t count; ++ } technique; ++ struct fx_pass ++ { ++ uint32_t name; ++ uint32_t count; ++ } pass; ++ const char *name; ++ uint32_t i; ++ ++ if (parser->failed) ++ return; ++ ++ fx_parser_read_u32s(parser, &technique, sizeof(technique)); ++ ++ name = fx_4_get_string(parser, technique.name); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "technique%u %s", parser->version, name); ++ fx_parse_fx_4_annotations(parser); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); ++ ++ parse_fx_start_indent(parser); ++ for (i = 0; i < technique.count; ++i) ++ { ++ fx_parser_read_u32s(parser, &pass, sizeof(pass)); ++ name = fx_4_get_string(parser, pass.name); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "pass %s", name); ++ fx_parse_fx_4_annotations(parser); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n"); ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); ++ ++ parse_fx_start_indent(parser); ++ fx_4_parse_state_object_initializer(parser, pass.count, HLSL_CLASS_PASS); ++ parse_fx_end_indent(parser); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); ++ } ++ ++ parse_fx_end_indent(parser); ++ ++ parse_fx_print_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); ++} ++ ++static void fx_parse_groups(struct fx_parser *parser) ++{ ++ struct fx_group ++ { ++ uint32_t name; ++ uint32_t count; ++ } group; ++ const char *name; ++ uint32_t i, j; ++ ++ if (parser->failed) ++ return; ++ ++ for (i = 0; i < parser->group_count; ++i) ++ { ++ fx_parser_read_u32s(parser, &group, sizeof(group)); ++ ++ name = fx_4_get_string(parser, group.name); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "fxgroup %s", name); ++ fx_parse_fx_4_annotations(parser); ++ ++ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); ++ parse_fx_start_indent(parser); ++ ++ for (j = 0; j < group.count; ++j) ++ fx_parse_fx_4_technique(parser); ++ ++ parse_fx_end_indent(parser); ++ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); + } +} + @@ -4024,7 +5358,9 @@ index a1d1fd6572f..f3f7a2c765c 100644 + uint32_t shader_count; + uint32_t inline_shader_count; + } header; ++ uint32_t i; + ++ parser->version = 10; + fx_parser_read_u32s(parser, &header, sizeof(header)); + parser->buffer_count = header.buffer_count; + parser->object_count = header.object_count; @@ -4043,6 +5379,9 @@ index a1d1fd6572f..f3f7a2c765c 100644 + fx_parse_buffers(parser); + fx_4_parse_objects(parser); + ++ for (i = 0; i < header.technique_count; ++i) ++ fx_parse_fx_4_technique(parser); ++ + return parser->failed ? - 1 : 0; +} + @@ -4076,9 +5415,11 @@ index a1d1fd6572f..f3f7a2c765c 100644 + uint32_t class_instance_element_count; + } header; + ++ parser->version = 11; + fx_parser_read_u32s(parser, &header, sizeof(header)); + parser->buffer_count = header.buffer_count; + parser->object_count = header.object_count; ++ parser->group_count = header.group_count; + + if (parser->end - parser->ptr < header.unstructured_size) + { @@ -4094,6 +5435,8 @@ index a1d1fd6572f..f3f7a2c765c 100644 + fx_parse_buffers(parser); + fx_4_parse_objects(parser); + ++ fx_parse_groups(parser); ++ + return parser->failed ? - 1 : 0; +} + @@ -4139,24 +5482,30 @@ index a1d1fd6572f..f3f7a2c765c 100644 + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index d1f02ab568b..46515818d07 100644 +index d1f02ab568b..0df0e30f399 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -18,6 +18,13 @@ +@@ -18,6 +18,19 @@ #include "vkd3d_shader_private.h" +struct glsl_resource_type_info +{ ++ /* The number of coordinates needed to sample the resource type. */ + size_t coord_size; ++ /* Whether the resource type is an array type. */ ++ bool array; ++ /* Whether the resource type has a shadow/comparison variant. */ + bool shadow; ++ /* The type suffix for resource type. I.e., the "2D" part of "usampler2D" ++ * or "iimage2D". */ + const char *type_suffix; +}; + struct glsl_src { struct vkd3d_string_buffer *str; -@@ -38,9 +45,26 @@ struct vkd3d_glsl_generator +@@ -38,9 +51,26 @@ struct vkd3d_glsl_generator struct vkd3d_shader_location location; struct vkd3d_shader_message_context *message_context; unsigned int indent; @@ -4183,7 +5532,7 @@ index d1f02ab568b..46515818d07 100644 static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( struct vkd3d_glsl_generator *generator, enum vkd3d_shader_error error, const char *fmt, ...) -@@ -53,11 +77,110 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( +@@ -53,11 +83,110 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( generator->failed = true; } @@ -4212,17 +5561,17 @@ index d1f02ab568b..46515818d07 100644 +{ + static const struct glsl_resource_type_info info[] = + { -+ {0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ -+ {1, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ -+ {1, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ -+ {2, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ -+ {2, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ -+ {3, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ -+ {3, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ -+ {2, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ -+ {3, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ -+ {3, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ -+ {4, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ ++ {0, 0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ ++ {1, 0, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ ++ {1, 0, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ ++ {2, 0, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ ++ {2, 0, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ ++ {3, 0, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ ++ {3, 0, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ ++ {2, 1, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ ++ {3, 1, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ ++ {3, 1, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ ++ {4, 1, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ + }; + + if (!t || t >= ARRAY_SIZE(info)) @@ -4294,7 +5643,7 @@ index d1f02ab568b..46515818d07 100644 static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) { -@@ -67,6 +190,95 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, +@@ -67,6 +196,99 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); break; @@ -4382,6 +5731,10 @@ index d1f02ab568b..46515818d07 100644 + gen->prefix, reg->idx[0].offset, reg->idx[2].offset); + break; + ++ case VKD3DSPR_THREADID: ++ vkd3d_string_buffer_printf(buffer, "gl_GlobalInvocationID"); ++ break; ++ + case VKD3DSPR_IDXTEMP: + vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); + shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); @@ -4390,7 +5743,7 @@ index d1f02ab568b..46515818d07 100644 default: vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled register type %#x.", reg->type); -@@ -106,23 +318,118 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca +@@ -106,23 +328,118 @@ static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_ca vkd3d_string_buffer_release(cache, src->str); } @@ -4468,7 +5821,7 @@ index d1f02ab568b..46515818d07 100644 - "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); - shader_glsl_print_register_name(glsl_src->str, gen, reg); -+ if (reg->type == VKD3DSPR_IMMCONST) ++ if (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_THREADID) + src_data_type = VKD3D_DATA_UINT; + else + src_data_type = VKD3D_DATA_FLOAT; @@ -4517,7 +5870,7 @@ index d1f02ab568b..46515818d07 100644 } static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) -@@ -153,26 +460,89 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener +@@ -153,26 +470,89 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener return write_mask; } @@ -4616,7 +5969,7 @@ index d1f02ab568b..46515818d07 100644 } static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -@@ -183,138 +553,1671 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct +@@ -183,138 +563,1923 @@ static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } @@ -4904,16 +6257,37 @@ index d1f02ab568b..46515818d07 100644 + vkd3d_string_buffer_printf(gen->buffer, "default:\n"); +} + ++static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, ++ unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset) ++{ ++ switch (offset_size) ++ { ++ case 1: ++ vkd3d_string_buffer_printf(buffer, "%d", offset->u); ++ break; ++ case 2: ++ vkd3d_string_buffer_printf(buffer, "ivec2(%d, %d)", offset->u, offset->v); ++ break; ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Invalid texel offset size %u.", offset_size); ++ /* fall through */ ++ case 3: ++ vkd3d_string_buffer_printf(buffer, "ivec3(%d, %d, %d)", offset->u, offset->v, offset->w); ++ break; ++ } ++} ++ +static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ ++ unsigned int resource_id, resource_idx, resource_space, sample_count; + const struct glsl_resource_type_info *resource_type_info; -+ unsigned int resource_id, resource_idx, resource_space; + const struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_component_type sampled_type; + enum vkd3d_shader_resource_type resource_type; + struct vkd3d_string_buffer *fetch; + enum vkd3d_data_type data_type; -+ struct glsl_src coord, lod; ++ struct glsl_src coord; + struct glsl_dst dst; + uint32_t coord_mask; + @@ -4931,6 +6305,7 @@ index d1f02ab568b..46515818d07 100644 + { + resource_type = d->resource_type; + resource_space = d->register_space; ++ sample_count = d->sample_count; + sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); + data_type = vkd3d_data_type_from_component_type(sampled_type); + } @@ -4940,6 +6315,7 @@ index d1f02ab568b..46515818d07 100644 + "Internal compiler error: Undeclared resource descriptor %u.", resource_id); + resource_space = 0; + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ sample_count = 1; + data_type = VKD3D_DATA_FLOAT; + } + @@ -4956,7 +6332,6 @@ index d1f02ab568b..46515818d07 100644 + + glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&coord, gen, &ins->src[0], coord_mask); -+ glsl_src_init(&lod, gen, &ins->src[0], VKD3DSP_WRITEMASK_3); + fetch = vkd3d_string_buffer_get(&gen->string_buffers); + + vkd3d_string_buffer_printf(fetch, "texelFetch("); @@ -4964,43 +6339,92 @@ index d1f02ab568b..46515818d07 100644 + resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0); + vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer); + if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER) -+ vkd3d_string_buffer_printf(fetch, ", %s", lod.str->buffer); ++ { ++ vkd3d_string_buffer_printf(fetch, ", "); ++ if (ins->opcode != VKD3DSIH_LD2DMS) ++ shader_glsl_print_src(fetch, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, ins->src[0].reg.data_type); ++ else if (sample_count == 1) ++ /* If the resource isn't a true multisample resource, this is the ++ * "lod" parameter instead of the "sample" parameter. */ ++ vkd3d_string_buffer_printf(fetch, "0"); ++ else ++ shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type); ++ } + vkd3d_string_buffer_printf(fetch, ")"); + shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask); + + shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, fetch); -+ glsl_src_cleanup(&lod, &gen->string_buffers); + glsl_src_cleanup(&coord, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + ++static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_src_param *coord, const struct vkd3d_shader_src_param *ref, unsigned int coord_size) ++{ ++ uint32_t coord_mask = vkd3d_write_mask_from_component_count(coord_size); ++ ++ switch (coord_size) ++ { ++ case 1: ++ vkd3d_string_buffer_printf(buffer, "vec3("); ++ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ", 0.0, "); ++ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ")"); ++ break; ++ ++ case 4: ++ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ", "); ++ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); ++ break; ++ ++ default: ++ vkd3d_string_buffer_printf(buffer, "vec%u(", coord_size + 1); ++ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ", "); ++ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); ++ vkd3d_string_buffer_printf(buffer, ")"); ++ break; ++ } ++} ++ +static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ ++ bool shadow_sampler, array, bias, dynamic_offset, gather, grad, lod, lod_zero, offset, shadow; + const struct glsl_resource_type_info *resource_type_info; ++ const struct vkd3d_shader_src_param *resource, *sampler; + unsigned int resource_id, resource_idx, resource_space; + unsigned int sampler_id, sampler_idx, sampler_space; + const struct vkd3d_shader_descriptor_info1 *d; + enum vkd3d_shader_component_type sampled_type; + enum vkd3d_shader_resource_type resource_type; ++ unsigned int component_idx, coord_size; + struct vkd3d_string_buffer *sample; + enum vkd3d_data_type data_type; -+ struct glsl_src coord; + struct glsl_dst dst; -+ uint32_t coord_mask; + -+ if (vkd3d_shader_instruction_has_texel_offset(ins)) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled texel sample offset."); ++ bias = ins->opcode == VKD3DSIH_SAMPLE_B; ++ dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO; ++ gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_PO; ++ grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; ++ lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; ++ offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins); ++ shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; + -+ if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr -+ || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) ++ resource = &ins->src[1 + dynamic_offset]; ++ sampler = &ins->src[2 + dynamic_offset]; ++ ++ if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr ++ || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, + "Descriptor indexing is not supported."); + -+ resource_id = ins->src[1].reg.idx[0].offset; -+ resource_idx = ins->src[1].reg.idx[1].offset; ++ resource_id = resource->reg.idx[0].offset; ++ resource_idx = resource->reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) + { + resource_type = d->resource_type; @@ -5019,20 +6443,36 @@ index d1f02ab568b..46515818d07 100644 + + if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) + { -+ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); ++ coord_size = resource_type_info->coord_size; ++ array = resource_type_info->array; + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x.", resource_type); -+ coord_mask = vkd3d_write_mask_from_component_count(2); ++ coord_size = 2; ++ array = false; + } + -+ sampler_id = ins->src[2].reg.idx[0].offset; -+ sampler_idx = ins->src[2].reg.idx[1].offset; ++ sampler_id = sampler->reg.idx[0].offset; ++ sampler_idx = sampler->reg.idx[1].offset; + if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) + { + sampler_space = d->register_space; ++ shadow_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; ++ ++ if (shadow) ++ { ++ if (!shadow_sampler) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); ++ } ++ else ++ { ++ if (shadow_sampler) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); ++ } + } + else + { @@ -5042,17 +6482,127 @@ index d1f02ab568b..46515818d07 100644 + } + + glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&coord, gen, &ins->src[0], coord_mask); + sample = vkd3d_string_buffer_get(&gen->string_buffers); + -+ vkd3d_string_buffer_printf(sample, "texture("); ++ if (gather) ++ vkd3d_string_buffer_printf(sample, "textureGather"); ++ else if (grad) ++ vkd3d_string_buffer_printf(sample, "textureGrad"); ++ else if (lod) ++ vkd3d_string_buffer_printf(sample, "textureLod"); ++ else ++ vkd3d_string_buffer_printf(sample, "texture"); ++ vkd3d_string_buffer_printf(sample, "%s(", offset ? "Offset" : ""); + shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); -+ vkd3d_string_buffer_printf(sample, ", %s)", coord.str->buffer); -+ shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); ++ vkd3d_string_buffer_printf(sample, ", "); ++ if (shadow) ++ shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size); ++ else ++ shader_glsl_print_src(sample, gen, &ins->src[0], ++ vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type); ++ if (grad) ++ { ++ vkd3d_string_buffer_printf(sample, ", "); ++ shader_glsl_print_src(sample, gen, &ins->src[3], ++ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[3].reg.data_type); ++ vkd3d_string_buffer_printf(sample, ", "); ++ shader_glsl_print_src(sample, gen, &ins->src[4], ++ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[4].reg.data_type); ++ } ++ else if (lod_zero) ++ { ++ vkd3d_string_buffer_printf(sample, ", 0.0"); ++ } ++ else if (lod) ++ { ++ vkd3d_string_buffer_printf(sample, ", "); ++ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); ++ } ++ if (offset) ++ { ++ vkd3d_string_buffer_printf(sample, ", "); ++ if (dynamic_offset) ++ shader_glsl_print_src(sample, gen, &ins->src[1], ++ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[1].reg.data_type); ++ else ++ shader_glsl_print_texel_offset(sample, gen, coord_size - array, &ins->texel_offset); ++ } ++ if (bias) ++ { ++ vkd3d_string_buffer_printf(sample, ", "); ++ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); ++ } ++ else if (gather) ++ { ++ if ((component_idx = vsir_swizzle_get_component(sampler->swizzle, 0))) ++ vkd3d_string_buffer_printf(sample, ", %d", component_idx); ++ } ++ vkd3d_string_buffer_printf(sample, ")"); ++ shader_glsl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask); + + shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); + + vkd3d_string_buffer_release(&gen->string_buffers, sample); ++ glsl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void shader_glsl_load_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ const struct glsl_resource_type_info *resource_type_info; ++ enum vkd3d_shader_component_type component_type; ++ const struct vkd3d_shader_descriptor_info1 *d; ++ enum vkd3d_shader_resource_type resource_type; ++ unsigned int uav_id, uav_idx, uav_space; ++ struct vkd3d_string_buffer *load; ++ struct glsl_src coord; ++ struct glsl_dst dst; ++ uint32_t coord_mask; ++ ++ if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, ++ "Descriptor indexing is not supported."); ++ ++ uav_id = ins->src[1].reg.idx[0].offset; ++ uav_idx = ins->src[1].reg.idx[1].offset; ++ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) ++ { ++ resource_type = d->resource_type; ++ uav_space = d->register_space; ++ component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); ++ } ++ else ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); ++ uav_space = 0; ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ } ++ ++ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) ++ { ++ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); ++ } ++ else ++ { ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled UAV type %#x.", resource_type); ++ coord_mask = vkd3d_write_mask_from_component_count(2); ++ } ++ ++ glsl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ glsl_src_init(&coord, gen, &ins->src[0], coord_mask); ++ load = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ vkd3d_string_buffer_printf(load, "imageLoad("); ++ shader_glsl_print_image_name(load, gen, uav_idx, uav_space); ++ vkd3d_string_buffer_printf(load, ", %s)", coord.str->buffer); ++ shader_glsl_print_swizzle(load, ins->src[1].swizzle, ins->dst[0].write_mask); ++ ++ shader_glsl_print_assignment_ext(gen, &dst, ++ vkd3d_data_type_from_component_type(component_type), "%s", load->buffer); ++ ++ vkd3d_string_buffer_release(&gen->string_buffers, load); + glsl_src_cleanup(&coord, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} @@ -5260,7 +6810,13 @@ index d1f02ab568b..46515818d07 100644 + "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type); + vkd3d_string_buffer_printf(buffer, + "uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))"); ++ break; + ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ if (version->type != VKD3D_SHADER_TYPE_PIXEL) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled SV_SAMPLE_INDEX in shader type #%x.", version->type); ++ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_SampleID, 0, 0, 0))"); + break; + + case VKD3D_SHADER_SV_TARGET: @@ -5382,6 +6938,9 @@ index d1f02ab568b..46515818d07 100644 + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); + break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, " = floatBitsToInt(%s_out[%u])", gen->prefix, e->register_index); ++ break; + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled output component type %#x.", e->component_type); @@ -5490,6 +7049,16 @@ index d1f02ab568b..46515818d07 100644 + case VKD3DSIH_FTOU: + shader_glsl_cast(gen, ins, "uint", "uvec"); + break; ++ case VKD3DSIH_GATHER4: ++ case VKD3DSIH_GATHER4_PO: ++ case VKD3DSIH_SAMPLE: ++ case VKD3DSIH_SAMPLE_B: ++ case VKD3DSIH_SAMPLE_C: ++ case VKD3DSIH_SAMPLE_C_LZ: ++ case VKD3DSIH_SAMPLE_GRAD: ++ case VKD3DSIH_SAMPLE_LOD: ++ shader_glsl_sample(gen, ins); ++ break; + case VKD3DSIH_GEO: + case VKD3DSIH_IGE: + shader_glsl_relop(gen, ins, ">=", "greaterThanEqual"); @@ -5507,9 +7076,11 @@ index d1f02ab568b..46515818d07 100644 + break; + case VKD3DSIH_IMAX: + case VKD3DSIH_MAX: ++ case VKD3DSIH_UMAX: + shader_glsl_intrinsic(gen, ins, "max"); + break; + case VKD3DSIH_MIN: ++ case VKD3DSIH_UMIN: + shader_glsl_intrinsic(gen, ins, "min"); + break; + case VKD3DSIH_IMUL: @@ -5534,8 +7105,12 @@ index d1f02ab568b..46515818d07 100644 + shader_glsl_cast(gen, ins, "float", "vec"); + break; + case VKD3DSIH_LD: ++ case VKD3DSIH_LD2DMS: + shader_glsl_ld(gen, ins); + break; ++ case VKD3DSIH_LD_UAV_TYPED: ++ shader_glsl_load_uav_typed(gen, ins); ++ break; + case VKD3DSIH_LOG: + shader_glsl_intrinsic(gen, ins, "log2"); + break; @@ -5575,9 +7150,6 @@ index d1f02ab568b..46515818d07 100644 + case VKD3DSIH_RSQ: + shader_glsl_intrinsic(gen, ins, "inversesqrt"); + break; -+ case VKD3DSIH_SAMPLE: -+ shader_glsl_sample(gen, ins); -+ break; + case VKD3DSIH_SQRT: + shader_glsl_intrinsic(gen, ins, "sqrt"); + break; @@ -5892,6 +7464,7 @@ index d1f02ab568b..46515818d07 100644 + struct vkd3d_string_buffer *buffer = gen->buffer; + enum vkd3d_shader_component_type component_type; + const char *sampler_type, *sampler_type_prefix; ++ enum vkd3d_shader_resource_type resource_type; + unsigned int binding_idx; + bool shadow = false; + @@ -5917,18 +7490,32 @@ index d1f02ab568b..46515818d07 100644 + return; + } + -+ if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type))) ++ resource_type = srv->resource_type; ++ if (srv->sample_count == 1) ++ { ++ /* The OpenGL API distinguishes between multi-sample textures with ++ * sample count 1 and single-sample textures. Direct3D and Vulkan ++ * don't make this distinction at the API level, but Direct3D shaders ++ * are capable of expressing both. We therefore map such multi-sample ++ * textures to their single-sample equivalents here. */ ++ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; ++ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) ++ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; ++ } ++ ++ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) + { + sampler_type = resource_type_info->type_suffix; + if (shadow && !resource_type_info->shadow) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Comparison samplers are not supported with resource type %#x.", srv->resource_type); ++ "Comparison samplers are not supported with resource type %#x.", resource_type); + } + else + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled resource type %#x for combined resource/sampler " -+ "for resource %u, space %u and sampler %u, space %u.", srv->resource_type, ++ "for resource %u, space %u and sampler %u, space %u.", resource_type, + crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); + sampler_type = ""; + } @@ -5953,7 +7540,7 @@ index d1f02ab568b..46515818d07 100644 + break; + } + -+ if (!shader_glsl_get_combined_sampler_binding(gen, crs, srv->resource_type, &binding_idx)) ++ if (!shader_glsl_get_combined_sampler_binding(gen, crs, resource_type, &binding_idx)) + { + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, + "No descriptor binding specified for combined resource/sampler " @@ -6194,6 +7781,9 @@ index d1f02ab568b..46515818d07 100644 + case VKD3D_SHADER_COMPONENT_UINT: + vkd3d_string_buffer_printf(buffer, "uvec4"); + break; ++ case VKD3D_SHADER_COMPONENT_INT: ++ vkd3d_string_buffer_printf(buffer, "ivec4"); ++ break; + case VKD3D_SHADER_COMPONENT_FLOAT: + vkd3d_string_buffer_printf(buffer, "vec4"); + break; @@ -6216,6 +7806,20 @@ index d1f02ab568b..46515818d07 100644 + } +} + ++static void shader_glsl_handle_global_flags(struct vkd3d_string_buffer *buffer, ++ struct vkd3d_glsl_generator *gen, enum vsir_global_flags flags) ++{ ++ if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) ++ { ++ vkd3d_string_buffer_printf(buffer, "layout(early_fragment_tests) in;\n"); ++ flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; ++ } ++ ++ if (flags) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags); ++} ++ +static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct vsir_program *program = gen->program; @@ -6229,6 +7833,8 @@ index d1f02ab568b..46515818d07 100644 + group_size->x, group_size->y, group_size->z); + } + ++ shader_glsl_handle_global_flags(buffer, gen, program->global_flags); ++ + shader_glsl_generate_descriptor_declarations(gen); + shader_glsl_generate_input_declarations(gen); + shader_glsl_generate_output_declarations(gen); @@ -6364,8 +7970,7 @@ index d1f02ab568b..46515818d07 100644 + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + -+ VKD3D_ASSERT(program->normalised_io); -+ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); - vkd3d_glsl_generator_init(&generator, program, message_context); + vkd3d_glsl_generator_init(&generator, program, compile_info, @@ -6374,10 +7979,10 @@ index d1f02ab568b..46515818d07 100644 vkd3d_glsl_generator_cleanup(&generator); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index bd5baacd83d..6ad0117fd5c 100644 +index bd5baacd83d..3be9ba9979b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -254,6 +254,46 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) +@@ -254,6 +254,47 @@ bool hlsl_type_is_resource(const struct hlsl_type *type) } } @@ -6414,6 +8019,7 @@ index bd5baacd83d..6ad0117fd5c 100644 + case HLSL_CLASS_UAV: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_VOID: + case HLSL_CLASS_NULL: + return false; @@ -6424,7 +8030,7 @@ index bd5baacd83d..6ad0117fd5c 100644 /* Only intended to be used for derefs (after copies have been lowered to components or vectors) or * resources, since for both their data types span across a single regset. */ static enum hlsl_regset type_get_regset(const struct hlsl_type *type) -@@ -379,6 +419,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type +@@ -379,6 +420,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -6432,7 +8038,15 @@ index bd5baacd83d..6ad0117fd5c 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_RASTERIZER_STATE: -@@ -455,6 +496,7 @@ static bool type_is_single_component(const struct hlsl_type *type) +@@ -393,6 +435,7 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -455,6 +498,7 @@ static bool type_is_single_component(const struct hlsl_type *type) { case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: @@ -6440,7 +8054,15 @@ index bd5baacd83d..6ad0117fd5c 100644 case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_SCALAR: case HLSL_CLASS_SAMPLER: -@@ -631,6 +673,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty +@@ -483,6 +527,7 @@ static bool type_is_single_component(const struct hlsl_type *type) + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_STREAM_OUTPUT: + break; + } + vkd3d_unreachable(); +@@ -631,12 +676,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty break; case HLSL_CLASS_EFFECT_GROUP: @@ -6448,7 +8070,37 @@ index bd5baacd83d..6ad0117fd5c 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: -@@ -930,6 +973,7 @@ static const char * get_case_insensitive_typename(const char *name) + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: ++ case HLSL_CLASS_STREAM_OUTPUT: + vkd3d_unreachable(); + } + type = next_type; +@@ -855,6 +902,22 @@ struct hlsl_type *hlsl_new_array_type(struct hlsl_ctx *ctx, struct hlsl_type *ba + return type; + } + ++struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, ++ enum hlsl_so_object_type so_type, struct hlsl_type *data_type) ++{ ++ struct hlsl_type *type; ++ ++ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) ++ return NULL; ++ type->class = HLSL_CLASS_STREAM_OUTPUT; ++ type->e.so.so_type = so_type; ++ type->e.so.type = data_type; ++ ++ list_add_tail(&ctx->types, &type->entry); ++ ++ return type; ++} ++ + struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, + struct hlsl_struct_field *fields, size_t field_count) + { +@@ -930,6 +993,7 @@ static const char * get_case_insensitive_typename(const char *name) { "dword", "float", @@ -6456,7 +8108,7 @@ index bd5baacd83d..6ad0117fd5c 100644 "matrix", "pixelshader", "texture", -@@ -1021,6 +1065,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) +@@ -1021,6 +1085,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: @@ -6464,7 +8116,23 @@ index bd5baacd83d..6ad0117fd5c 100644 case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: -@@ -1115,6 +1160,7 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 +@@ -1041,6 +1106,7 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_STREAM_OUTPUT: + break; + } + +@@ -1112,9 +1178,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + case HLSL_CLASS_CONSTANT_BUFFER: + return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); + ++ case HLSL_CLASS_STREAM_OUTPUT: ++ if (t1->e.so.so_type != t2->e.so.so_type) ++ return false; ++ return hlsl_types_are_equal(t1->e.so.type, t2->e.so.type); ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -6472,7 +8140,7 @@ index bd5baacd83d..6ad0117fd5c 100644 case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_RASTERIZER_STATE: -@@ -1575,7 +1621,6 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp +@@ -1575,7 +1647,6 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; @@ -6480,7 +8148,7 @@ index bd5baacd83d..6ad0117fd5c 100644 return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } -@@ -1589,6 +1634,16 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex +@@ -1589,6 +1660,16 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -6497,7 +8165,30 @@ index bd5baacd83d..6ad0117fd5c 100644 struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { -@@ -1792,6 +1847,118 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned +@@ -1640,6 +1721,22 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * + return &s->node; + } + ++struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, ++ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_vsir_instruction_ref *vsir_instr; ++ ++ if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) ++ return NULL; ++ init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); ++ vsir_instr->vsir_instr_idx = vsir_instr_idx; ++ ++ if (reg) ++ vsir_instr->node.reg = *reg; ++ ++ return &vsir_instr->node; ++} ++ + struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) + { +@@ -1792,6 +1889,118 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned return &swizzle->node; } @@ -6616,7 +8307,7 @@ index bd5baacd83d..6ad0117fd5c 100644 struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc) { -@@ -2142,6 +2309,51 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr +@@ -2142,6 +2351,51 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } @@ -6668,7 +8359,7 @@ index bd5baacd83d..6ad0117fd5c 100644 static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_stateblock_constant *constant) { -@@ -2149,8 +2361,8 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, +@@ -2149,8 +2403,8 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, } struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, @@ -6679,7 +8370,7 @@ index bd5baacd83d..6ad0117fd5c 100644 { struct hlsl_state_block_entry *entry; struct clone_instr_map map = { 0 }; -@@ -2166,7 +2378,11 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, +@@ -2166,7 +2420,11 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, return NULL; } @@ -6692,7 +8383,7 @@ index bd5baacd83d..6ad0117fd5c 100644 if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) { hlsl_free_state_block_entry(entry); -@@ -2179,7 +2395,16 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, +@@ -2179,7 +2437,16 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, hlsl_free_state_block_entry(entry); return NULL; } @@ -6710,7 +8401,7 @@ index bd5baacd83d..6ad0117fd5c 100644 vkd3d_free(map.instrs); return entry; -@@ -2284,6 +2509,12 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, +@@ -2284,8 +2551,17 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); @@ -6722,8 +8413,13 @@ index bd5baacd83d..6ad0117fd5c 100644 + case HLSL_IR_STATEBLOCK_CONSTANT: return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); ++ ++ case HLSL_IR_VSIR_INSTRUCTION_REF: ++ vkd3d_unreachable(); } -@@ -2314,6 +2545,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, + + vkd3d_unreachable(); +@@ -2314,6 +2590,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, decl->return_type = return_type; decl->parameters = *parameters; decl->loc = *loc; @@ -6731,18 +8427,57 @@ index bd5baacd83d..6ad0117fd5c 100644 if (!hlsl_types_are_equal(return_type, ctx->builtin_types.Void)) { -@@ -2570,6 +2802,10 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru +@@ -2523,6 +2800,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + return string; + + case HLSL_CLASS_TEXTURE: ++ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); ++ return string; ++ } ++ + if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + { + vkd3d_string_buffer_printf(string, "Texture"); +@@ -2548,6 +2831,11 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + return string; + + case HLSL_CLASS_UAV: ++ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ vkd3d_string_buffer_printf(string, "RWByteAddressBuffer"); ++ return string; ++ } + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + vkd3d_string_buffer_printf(string, "RWBuffer"); + else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +@@ -2570,6 +2858,24 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } return string; + case HLSL_CLASS_ERROR: + vkd3d_string_buffer_printf(string, ""); + return string; ++ ++ case HLSL_CLASS_STREAM_OUTPUT: ++ if (type->e.so.so_type == HLSL_STREAM_OUTPUT_POINT_STREAM) ++ vkd3d_string_buffer_printf(string, "PointStream"); ++ else if (type->e.so.so_type == HLSL_STREAM_OUTPUT_LINE_STREAM) ++ vkd3d_string_buffer_printf(string, "LineStream"); ++ else ++ vkd3d_string_buffer_printf(string, "TriangleStream"); ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.so.type))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } ++ return string; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: -@@ -2698,6 +2934,9 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) +@@ -2698,7 +3004,11 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_STORE ] = "HLSL_IR_STORE", [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", @@ -6750,9 +8485,11 @@ index bd5baacd83d..6ad0117fd5c 100644 + [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", + [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", ++ [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", }; -@@ -2907,6 +3146,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + if (type >= ARRAY_SIZE(names)) +@@ -2907,6 +3217,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) { static const char *const op_names[] = { @@ -6760,7 +8497,15 @@ index bd5baacd83d..6ad0117fd5c 100644 [HLSL_OP0_VOID] = "void", [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", -@@ -3146,6 +3386,40 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ +@@ -2924,6 +3235,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_DSY_FINE] = "dsy_fine", + [HLSL_OP1_EXP2] = "exp2", + [HLSL_OP1_F16TOF32] = "f16tof32", ++ [HLSL_OP1_F32TOF16] = "f32tof16", + [HLSL_OP1_FLOOR] = "floor", + [HLSL_OP1_FRACT] = "fract", + [HLSL_OP1_LOG2] = "log2", +@@ -3146,6 +3458,40 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); } @@ -6801,7 +8546,7 @@ index bd5baacd83d..6ad0117fd5c 100644 static void dump_ir_stateblock_constant(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_stateblock_constant *constant) { -@@ -3245,6 +3519,14 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, +@@ -3245,9 +3591,22 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break; @@ -6816,7 +8561,15 @@ index bd5baacd83d..6ad0117fd5c 100644 case HLSL_IR_STATEBLOCK_CONSTANT: dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); break; -@@ -3308,8 +3590,8 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) ++ ++ case HLSL_IR_VSIR_INSTRUCTION_REF: ++ vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", ++ hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); ++ break; + } + } + +@@ -3308,8 +3667,8 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) { struct hlsl_src *src, *next; @@ -6827,7 +8580,7 @@ index bd5baacd83d..6ad0117fd5c 100644 LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) { -@@ -3459,6 +3741,24 @@ static void free_ir_index(struct hlsl_ir_index *index) +@@ -3459,6 +3818,24 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); } @@ -6852,7 +8605,7 @@ index bd5baacd83d..6ad0117fd5c 100644 static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *constant) { vkd3d_free(constant->name); -@@ -3527,6 +3827,14 @@ void hlsl_free_instr(struct hlsl_ir_node *node) +@@ -3527,9 +3904,21 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_switch(hlsl_ir_switch(node)); break; @@ -6867,7 +8620,14 @@ index bd5baacd83d..6ad0117fd5c 100644 case HLSL_IR_STATEBLOCK_CONSTANT: free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); break; -@@ -3801,12 +4109,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) ++ ++ case HLSL_IR_VSIR_INSTRUCTION_REF: ++ vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); ++ break; + } + } + +@@ -3801,12 +4190,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) static const char * const names[] = { @@ -6886,7 +8646,7 @@ index bd5baacd83d..6ad0117fd5c 100644 }; static const char *const variants_float[] = {"min10float", "min16float"}; -@@ -3957,6 +4265,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) +@@ -3957,6 +4346,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); @@ -6894,7 +8654,7 @@ index bd5baacd83d..6ad0117fd5c 100644 hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); -@@ -4059,6 +4368,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil +@@ -4059,6 +4449,7 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; @@ -6902,7 +8662,7 @@ index bd5baacd83d..6ad0117fd5c 100644 break; case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: -@@ -4078,6 +4388,15 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil +@@ -4078,6 +4469,15 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil } } @@ -6918,7 +8678,7 @@ index bd5baacd83d..6ad0117fd5c 100644 return true; } -@@ -4089,8 +4408,6 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) +@@ -4089,8 +4489,6 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) struct hlsl_type *type, *next_type; unsigned int i; @@ -6927,17 +8687,17 @@ index bd5baacd83d..6ad0117fd5c 100644 for (i = 0; i < ctx->source_files_count; ++i) vkd3d_free((void *)ctx->source_files[i]); vkd3d_free(ctx->source_files); -@@ -4098,6 +4415,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - - rb_destroy(&ctx->functions, free_function_rb, NULL); +@@ -4113,6 +4511,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) + } + } + hlsl_block_cleanup(&ctx->static_initializers); + - /* State blocks must be free before the variables, because they contain instructions that may - * refer to them. */ LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) + { + LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 22e25b23988..efe3aec024b 100644 +index 22e25b23988..4824234ab99 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -22,7 +22,6 @@ @@ -6963,15 +8723,51 @@ index 22e25b23988..efe3aec024b 100644 enum hlsl_type_class { HLSL_CLASS_SCALAR, -@@ -99,6 +106,7 @@ enum hlsl_type_class +@@ -97,8 +104,10 @@ enum hlsl_type_class + HLSL_CLASS_GEOMETRY_SHADER, + HLSL_CLASS_CONSTANT_BUFFER, HLSL_CLASS_BLEND_STATE, ++ HLSL_CLASS_STREAM_OUTPUT, HLSL_CLASS_VOID, HLSL_CLASS_NULL, + HLSL_CLASS_ERROR, }; enum hlsl_base_type -@@ -316,6 +324,9 @@ enum hlsl_ir_node_type +@@ -128,10 +137,18 @@ enum hlsl_sampler_dim + HLSL_SAMPLER_DIM_CUBEARRAY, + HLSL_SAMPLER_DIM_BUFFER, + HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, +- HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, ++ HLSL_SAMPLER_DIM_RAW_BUFFER, ++ HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_RAW_BUFFER, + /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ + }; + ++enum hlsl_so_object_type ++{ ++ HLSL_STREAM_OUTPUT_POINT_STREAM, ++ HLSL_STREAM_OUTPUT_LINE_STREAM, ++ HLSL_STREAM_OUTPUT_TRIANGLE_STREAM, ++}; ++ + enum hlsl_regset + { + HLSL_REGSET_SAMPLERS, +@@ -210,6 +227,12 @@ struct hlsl_type + } resource; + /* Additional field to distinguish object types. Currently used only for technique types. */ + unsigned int version; ++ /* Additional information if type is HLSL_CLASS_STREAM_OUTPUT. */ ++ struct ++ { ++ struct hlsl_type *type; ++ enum hlsl_so_object_type so_type; ++ } so; + } e; + + /* Number of numeric register components used by one value of this type, for each regset. +@@ -316,7 +339,12 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, @@ -6979,9 +8775,12 @@ index 22e25b23988..efe3aec024b 100644 + HLSL_IR_COMPILE, + HLSL_IR_SAMPLER_STATE, HLSL_IR_STATEBLOCK_CONSTANT, ++ ++ HLSL_IR_VSIR_INSTRUCTION_REF, }; -@@ -352,6 +363,9 @@ struct hlsl_block + /* Common data for every type of IR instruction node. */ +@@ -352,6 +380,9 @@ struct hlsl_block { /* List containing instruction nodes; linked by the hlsl_ir_node.entry fields. */ struct list instrs; @@ -6991,7 +8790,21 @@ index 22e25b23988..efe3aec024b 100644 }; /* A reference to an instruction node (struct hlsl_ir_node), usable as a field in other structs. -@@ -474,6 +488,8 @@ struct hlsl_ir_var +@@ -396,10 +427,12 @@ struct hlsl_attribute + #define HLSL_MODIFIER_SINGLE 0x00020000 + #define HLSL_MODIFIER_EXPORT 0x00040000 + #define HLSL_STORAGE_ANNOTATION 0x00080000 ++#define HLSL_MODIFIER_UNORM 0x00100000 ++#define HLSL_MODIFIER_SNORM 0x00200000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +- HLSL_MODIFIER_COLUMN_MAJOR) ++ HLSL_MODIFIER_COLUMN_MAJOR | HLSL_MODIFIER_UNORM | HLSL_MODIFIER_SNORM) + + #define HLSL_INTERPOLATION_MODIFIERS_MASK (HLSL_STORAGE_NOINTERPOLATION | HLSL_STORAGE_CENTROID | \ + HLSL_STORAGE_NOPERSPECTIVE | HLSL_STORAGE_LINEAR) +@@ -474,6 +507,8 @@ struct hlsl_ir_var * range). The IR instructions are numerated starting from 2, because 0 means unused, and 1 * means function entry. */ unsigned int first_write, last_read; @@ -7000,7 +8813,17 @@ index 22e25b23988..efe3aec024b 100644 /* Offset where the variable's value is stored within its buffer in numeric register components. * This in case the variable is uniform. */ unsigned int buffer_offset; -@@ -591,10 +607,18 @@ struct hlsl_ir_function_decl +@@ -498,6 +533,9 @@ struct hlsl_ir_var + + /* Whether the shader performs dereferences with non-constant offsets in the variable. */ + bool indexable; ++ /* Whether this is a semantic variable that was split from an array, or is the first ++ * element of a struct, and thus needs to be aligned when packed in the signature. */ ++ bool force_align; + + uint32_t is_input_semantic : 1; + uint32_t is_output_semantic : 1; +@@ -591,10 +629,18 @@ struct hlsl_ir_function_decl unsigned int attr_count; const struct hlsl_attribute *const *attrs; @@ -7019,7 +8842,7 @@ index 22e25b23988..efe3aec024b 100644 }; struct hlsl_ir_call -@@ -646,6 +670,7 @@ struct hlsl_ir_switch +@@ -646,6 +692,7 @@ struct hlsl_ir_switch enum hlsl_ir_expr_op { @@ -7027,7 +8850,15 @@ index 22e25b23988..efe3aec024b 100644 HLSL_OP0_VOID, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, -@@ -703,7 +728,7 @@ enum hlsl_ir_expr_op +@@ -663,6 +710,7 @@ enum hlsl_ir_expr_op + HLSL_OP1_DSY_FINE, + HLSL_OP1_EXP2, + HLSL_OP1_F16TOF32, ++ HLSL_OP1_F32TOF16, + HLSL_OP1_FLOOR, + HLSL_OP1_FRACT, + HLSL_OP1_LOG2, +@@ -703,7 +751,7 @@ enum hlsl_ir_expr_op HLSL_OP2_SLT, /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, @@ -7036,7 +8867,7 @@ index 22e25b23988..efe3aec024b 100644 HLSL_OP3_DP2ADD, /* TERNARY(a, b, c) returns 'b' if 'a' is true and 'c' otherwise. 'a' must always be boolean. * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ -@@ -854,6 +879,43 @@ struct hlsl_ir_string_constant +@@ -854,6 +902,43 @@ struct hlsl_ir_string_constant char *string; }; @@ -7080,7 +8911,24 @@ index 22e25b23988..efe3aec024b 100644 /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ struct hlsl_ir_stateblock_constant -@@ -965,10 +1027,11 @@ struct hlsl_ctx +@@ -862,6 +947,16 @@ struct hlsl_ir_stateblock_constant + char *name; + }; + ++/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. ++ * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ ++struct hlsl_ir_vsir_instruction_ref ++{ ++ struct hlsl_ir_node node; ++ ++ /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ ++ unsigned int vsir_instr_idx; ++}; ++ + struct hlsl_scope + { + /* Item entry for hlsl_ctx.scopes. */ +@@ -965,10 +1060,11 @@ struct hlsl_ctx struct hlsl_scope *dummy_scope; /* List of all the scopes in the program; linked by the hlsl_scope.entry fields. */ struct list scopes; @@ -7096,7 +8944,7 @@ index 22e25b23988..efe3aec024b 100644 struct list extern_vars; /* List containing both the built-in HLSL buffers ($Globals and $Params) and the ones declared -@@ -1003,8 +1066,12 @@ struct hlsl_ctx +@@ -1003,8 +1099,12 @@ struct hlsl_ctx struct hlsl_type *string; struct hlsl_type *Void; struct hlsl_type *null; @@ -7109,7 +8957,7 @@ index 22e25b23988..efe3aec024b 100644 /* List of the instruction nodes for initializing static variables. */ struct hlsl_block static_initializers; -@@ -1016,19 +1083,23 @@ struct hlsl_ctx +@@ -1016,19 +1116,23 @@ struct hlsl_ctx { uint32_t index; struct hlsl_vec4 value; @@ -7136,7 +8984,7 @@ index 22e25b23988..efe3aec024b 100644 /* In some cases we generate opcodes by parsing an HLSL function and then * invoking it. If not NULL, this field is the name of the function that we * are currently parsing, "mangled" with an internal prefix to avoid -@@ -1044,6 +1115,7 @@ struct hlsl_ctx +@@ -1044,6 +1148,7 @@ struct hlsl_ctx bool child_effect; bool include_empty_buffers; bool warn_implicit_truncation; @@ -7144,7 +8992,7 @@ index 22e25b23988..efe3aec024b 100644 }; static inline bool hlsl_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) -@@ -1149,6 +1221,18 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n +@@ -1149,25 +1254,46 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } @@ -7163,7 +9011,15 @@ index 22e25b23988..efe3aec024b 100644 static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); -@@ -1158,16 +1242,19 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co + return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); + } + ++static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) ++{ ++ VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); ++ return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); ++} ++ static inline void hlsl_block_init(struct hlsl_block *block) { list_init(&block->instrs); @@ -7183,7 +9039,15 @@ index 22e25b23988..efe3aec024b 100644 } static inline void hlsl_src_from_node(struct hlsl_src *src, struct hlsl_ir_node *node) -@@ -1330,13 +1417,19 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const +@@ -1283,6 +1409,7 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) + { + case HLSL_SAMPLER_DIM_1D: + case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return 1; + case HLSL_SAMPLER_DIM_1DARRAY: +@@ -1330,12 +1457,15 @@ bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); @@ -7199,13 +9063,18 @@ index 22e25b23988..efe3aec024b 100644 +void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); -+uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -+void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); -+void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); - int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); -@@ -1428,6 +1521,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); +@@ -1402,6 +1532,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond + struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); ++struct hlsl_type *hlsl_new_stream_output_type(struct hlsl_ctx *ctx, ++ enum hlsl_so_object_type so_type, struct hlsl_type *type); + struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); + +@@ -1428,6 +1560,9 @@ bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index); bool hlsl_index_is_resource_access(struct hlsl_ir_index *index); bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); @@ -7215,7 +9084,7 @@ index 22e25b23988..efe3aec024b 100644 struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -@@ -1440,6 +1536,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, +@@ -1440,6 +1575,8 @@ struct hlsl_type *hlsl_new_struct_type(struct hlsl_ctx *ctx, const char *name, struct hlsl_struct_field *fields, size_t field_count); struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned int components, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); @@ -7224,7 +9093,17 @@ index 22e25b23988..efe3aec024b 100644 struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, -@@ -1493,6 +1591,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type); +@@ -1466,6 +1603,9 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned + struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, + struct list *cases, const struct vkd3d_shader_location *loc); + ++struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, ++ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); ++ + void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, + enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); + void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, +@@ -1493,6 +1633,7 @@ unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); bool hlsl_type_is_resource(const struct hlsl_type *type); @@ -7232,7 +9111,7 @@ index 22e25b23988..efe3aec024b 100644 unsigned int hlsl_type_get_sm4_offset(const struct hlsl_type *type, unsigned int offset); bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2); -@@ -1525,23 +1624,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, +@@ -1525,22 +1666,18 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); @@ -7255,15 +9134,23 @@ index 22e25b23988..efe3aec024b 100644 -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_shader_register_type *type, bool *has_idx); -int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); -- ++enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, ++ unsigned int storage_modifiers); + struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); - int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 0c02b27817e..97d8b13772b 100644 +index 0c02b27817e..31fb30521e9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -80,7 +80,9 @@ centroid {return KW_CENTROID; } +@@ -74,13 +74,16 @@ ANY (.) + BlendState {return KW_BLENDSTATE; } + break {return KW_BREAK; } + Buffer {return KW_BUFFER; } ++ByteAddressBuffer {return KW_BYTEADDRESSBUFFER; } + case {return KW_CASE; } + cbuffer {return KW_CBUFFER; } + centroid {return KW_CENTROID; } column_major {return KW_COLUMN_MAJOR; } ComputeShader {return KW_COMPUTESHADER; } compile {return KW_COMPILE; } @@ -7273,7 +9160,7 @@ index 0c02b27817e..97d8b13772b 100644 continue {return KW_CONTINUE; } DepthStencilState {return KW_DEPTHSTENCILSTATE; } DepthStencilView {return KW_DEPTHSTENCILVIEW; } -@@ -88,7 +90,6 @@ default {return KW_DEFAULT; } +@@ -88,7 +91,6 @@ default {return KW_DEFAULT; } discard {return KW_DISCARD; } DomainShader {return KW_DOMAINSHADER; } do {return KW_DO; } @@ -7281,8 +9168,53 @@ index 0c02b27817e..97d8b13772b 100644 else {return KW_ELSE; } export {return KW_EXPORT; } extern {return KW_EXTERN; } +@@ -102,6 +104,7 @@ if {return KW_IF; } + in {return KW_IN; } + inline {return KW_INLINE; } + inout {return KW_INOUT; } ++LineStream {return KW_LINESTREAM; } + linear {return KW_LINEAR; } + matrix {return KW_MATRIX; } + namespace {return KW_NAMESPACE; } +@@ -112,6 +115,7 @@ out {return KW_OUT; } + packoffset {return KW_PACKOFFSET; } + pass {return KW_PASS; } + PixelShader {return KW_PIXELSHADER; } ++PointStream {return KW_POINTSTREAM; } + pixelshader {return KW_PIXELSHADER; } + RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } + RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } +@@ -126,6 +130,7 @@ RenderTargetView {return KW_RENDERTARGETVIEW; } + return {return KW_RETURN; } + row_major {return KW_ROW_MAJOR; } + RWBuffer {return KW_RWBUFFER; } ++RWByteAddressBuffer {return KW_RWBYTEADDRESSBUFFER; } + RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } + RWTexture1D {return KW_RWTEXTURE1D; } + RWTexture1DArray {return KW_RWTEXTURE1DARRAY; } +@@ -141,6 +146,7 @@ samplerCUBE {return KW_SAMPLERCUBE; } + SamplerState {return KW_SAMPLER; } + sampler_state {return KW_SAMPLER_STATE; } + shared {return KW_SHARED; } ++snorm {return KW_SNORM; } + stateblock {return KW_STATEBLOCK; } + stateblock_state {return KW_STATEBLOCK_STATE; } + static {return KW_STATIC; } +@@ -166,10 +172,12 @@ texture3D {return KW_TEXTURE3D; } + TextureCube {return KW_TEXTURECUBE; } + textureCUBE {return KW_TEXTURECUBE; } + TextureCubeArray {return KW_TEXTURECUBEARRAY; } ++TriangleStream {return KW_TRIANGLESTREAM; } + true {return KW_TRUE; } + typedef {return KW_TYPEDEF; } + unsigned {return KW_UNSIGNED; } + uniform {return KW_UNIFORM; } ++unorm {return KW_UNORM; } + vector {return KW_VECTOR; } + VertexShader {return KW_VERTEXSHADER; } + vertexshader {return KW_VERTEXSHADER; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 3f319dea0d8..213cec79c3d 100644 +index 3f319dea0d8..03a2f38e4e9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -40,6 +40,7 @@ struct parse_initializer @@ -7293,6 +9225,15 @@ index 3f319dea0d8..213cec79c3d 100644 }; struct parse_parameter +@@ -52,7 +53,7 @@ struct parse_parameter + struct parse_initializer initializer; + }; + +-struct parse_colon_attribute ++struct parse_colon_attributes + { + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; @@ -147,7 +148,7 @@ static void yyerror(YYLTYPE *loc, void *scanner, struct hlsl_ctx *ctx, const cha static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) @@ -7302,7 +9243,17 @@ index 3f319dea0d8..213cec79c3d 100644 } static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) -@@ -437,6 +438,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct +@@ -331,6 +332,9 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node + { + const struct hlsl_type *type = cond->data_type; + ++ if (type->class == HLSL_CLASS_ERROR) ++ return; ++ + if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1) + { + struct vkd3d_string_buffer *string; +@@ -437,6 +441,9 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct if (hlsl_types_are_equal(src_type, dst_type)) return node; @@ -7312,7 +9263,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!implicit_compatible_data_types(ctx, src_type, dst_type)) { struct vkd3d_string_buffer *src_string, *dst_string; -@@ -458,6 +462,40 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct +@@ -458,6 +465,40 @@ static struct hlsl_ir_node *add_implicit_conversion(struct hlsl_ctx *ctx, struct return add_cast(ctx, block, node, dst_type, loc); } @@ -7353,7 +9304,7 @@ index 3f319dea0d8..213cec79c3d 100644 static uint32_t add_modifiers(struct hlsl_ctx *ctx, uint32_t modifiers, uint32_t mod, const struct vkd3d_shader_location *loc) { -@@ -489,9 +527,10 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co +@@ -489,9 +530,10 @@ static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *co check_condition_type(ctx, condition); bool_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); @@ -7366,7 +9317,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, cast, &condition->loc))) return false; -@@ -516,7 +555,7 @@ enum loop_type +@@ -516,7 +558,7 @@ enum loop_type LOOP_DO_WHILE }; @@ -7375,7 +9326,7 @@ index 3f319dea0d8..213cec79c3d 100644 { unsigned int i, j; -@@ -525,11 +564,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att +@@ -525,11 +567,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att for (j = i + 1; j < attrs->count; ++j) { if (!strcmp(attrs->attrs[i]->name, attrs->attrs[j]->name)) @@ -7389,7 +9340,14 @@ index 3f319dea0d8..213cec79c3d 100644 } static void resolve_loop_continue(struct hlsl_ctx *ctx, struct hlsl_block *block, enum loop_type type, -@@ -610,8 +648,10 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx +@@ -606,12 +647,17 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx + struct hlsl_block expr; + struct hlsl_src src; + ++ if (node_from_block(block)->data_type->class == HLSL_CLASS_ERROR) ++ return ret; ++ + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) { switch (node->type) { @@ -7400,7 +9358,16 @@ index 3f319dea0d8..213cec79c3d 100644 case HLSL_IR_STRING_CONSTANT: case HLSL_IR_SWIZZLE: case HLSL_IR_LOAD: -@@ -639,14 +679,15 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx +@@ -632,6 +678,8 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + break; ++ case HLSL_IR_VSIR_INSTRUCTION_REF: ++ vkd3d_unreachable(); + } + } + +@@ -639,14 +687,15 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx return ret; hlsl_block_add_block(&expr, block); @@ -7418,7 +9385,7 @@ index 3f319dea0d8..213cec79c3d 100644 hlsl_run_const_passes(ctx, &expr); node = src.node; hlsl_src_remove(&src); -@@ -697,9 +738,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -697,9 +746,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; @@ -7429,7 +9396,17 @@ index 3f319dea0d8..213cec79c3d 100644 check_loop_attributes(ctx, attributes, loc); /* Ignore unroll(0) attribute, and any invalid attribute. */ -@@ -974,6 +1013,12 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str +@@ -897,6 +944,9 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, + { + struct hlsl_ir_node *store; + ++ if (return_value->data_type->class == HLSL_CLASS_ERROR) ++ return true; ++ + if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) + return false; + +@@ -974,6 +1024,12 @@ static bool add_array_access(struct hlsl_ctx *ctx, struct hlsl_block *block, str const struct hlsl_type *expr_type = array->data_type, *index_type = index->data_type; struct hlsl_ir_node *return_index, *cast; @@ -7442,7 +9419,7 @@ index 3f319dea0d8..213cec79c3d 100644 if ((expr_type->class == HLSL_CLASS_TEXTURE || expr_type->class == HLSL_CLASS_UAV) && expr_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC) { -@@ -1164,6 +1209,32 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, +@@ -1164,6 +1220,33 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, return true; } @@ -7469,13 +9446,14 @@ index 3f319dea0d8..213cec79c3d 100644 + } + + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); -+ return false; ++ block->value = ctx->error_instr; ++ return true; +} + static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) { struct parse_variable_def *v, *v_next; -@@ -1227,7 +1298,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, +@@ -1227,7 +1310,8 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, } static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, @@ -7485,7 +9463,7 @@ index 3f319dea0d8..213cec79c3d 100644 static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, struct parse_parameter *param, const struct vkd3d_shader_location *loc) -@@ -1285,7 +1357,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters +@@ -1285,7 +1369,8 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters for (i = 0; i < param->initializer.args_count; ++i) { @@ -7495,7 +9473,7 @@ index 3f319dea0d8..213cec79c3d 100644 } free_parse_initializer(¶m->initializer); -@@ -1673,25 +1746,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl +@@ -1673,25 +1758,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl return expr; } @@ -7544,7 +9522,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -1699,12 +1783,18 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru +@@ -1699,12 +1795,18 @@ static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, stru { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; @@ -7563,7 +9541,7 @@ index 3f319dea0d8..213cec79c3d 100644 check_integer_type(ctx, arg); return add_unary_arithmetic_expr(ctx, block, op, arg, loc); -@@ -1716,6 +1806,9 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct +@@ -1716,6 +1818,9 @@ static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *bool_type; @@ -7573,7 +9551,7 @@ index 3f319dea0d8..213cec79c3d 100644 bool_type = hlsl_get_numeric_type(ctx, arg->data_type->class, HLSL_TYPE_BOOL, arg->data_type->dimx, arg->data_type->dimy); -@@ -1745,7 +1838,11 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str +@@ -1745,7 +1850,11 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; struct hlsl_type *common_type; @@ -7586,7 +9564,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(args[0] = add_implicit_conversion(ctx, block, arg1, common_type, loc))) return NULL; -@@ -1942,6 +2039,12 @@ static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hls +@@ -1942,6 +2051,12 @@ static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hls hlsl_block_add_block(block1, block2); destroy_block(block2); @@ -7599,7 +9577,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL) return NULL; -@@ -2048,18 +2151,23 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un +@@ -2048,18 +2163,23 @@ static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, un return true; } @@ -7626,7 +9604,7 @@ index 3f319dea0d8..213cec79c3d 100644 assign_op = ASSIGN_OP_ADD; } if (assign_op != ASSIGN_OP_ASSIGN) -@@ -2068,7 +2176,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2068,7 +2188,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo VKD3D_ASSERT(op); if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) @@ -7635,7 +9613,7 @@ index 3f319dea0d8..213cec79c3d 100644 } if (hlsl_is_numeric_type(lhs_type)) -@@ -2078,14 +2186,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2078,14 +2198,14 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo } if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) @@ -7652,7 +9630,7 @@ index 3f319dea0d8..213cec79c3d 100644 } else if (lhs->type == HLSL_IR_SWIZZLE) { -@@ -2100,25 +2208,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2100,25 +2220,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) { hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); @@ -7682,7 +9660,7 @@ index 3f319dea0d8..213cec79c3d 100644 hlsl_block_add_instr(block, new_swizzle); lhs = swizzle->val.node; -@@ -2127,7 +2233,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2127,7 +2245,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo else { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_LVALUE, "Invalid lvalue."); @@ -7691,7 +9669,7 @@ index 3f319dea0d8..213cec79c3d 100644 } } -@@ -2142,11 +2248,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2142,11 +2260,11 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) { hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource store."); @@ -7705,7 +9683,7 @@ index 3f319dea0d8..213cec79c3d 100644 resource_type = hlsl_deref_get_type(ctx, &resource_deref); VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); -@@ -2168,7 +2274,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2168,7 +2286,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) { hlsl_cleanup_deref(&resource_deref); @@ -7714,7 +9692,7 @@ index 3f319dea0d8..213cec79c3d 100644 } hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); -@@ -2195,13 +2301,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2195,13 +2313,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) { hlsl_cleanup_deref(&deref); @@ -7730,7 +9708,7 @@ index 3f319dea0d8..213cec79c3d 100644 } hlsl_block_add_block(block, &store_block); } -@@ -2226,23 +2332,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2226,23 +2344,23 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo continue; if (!(c = hlsl_new_uint_constant(ctx, i, &lhs->loc))) @@ -7759,7 +9737,7 @@ index 3f319dea0d8..213cec79c3d 100644 } hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&deref); -@@ -2254,24 +2360,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo +@@ -2254,24 +2372,19 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo struct hlsl_deref deref; if (!hlsl_init_deref_from_index_chain(ctx, &deref, lhs)) @@ -7788,7 +9766,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool decrement, bool post, -@@ -2280,6 +2381,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d +@@ -2280,6 +2393,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d struct hlsl_ir_node *lhs = node_from_block(block); struct hlsl_ir_node *one; @@ -7798,7 +9776,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (lhs->data_type->modifiers & HLSL_MODIFIER_CONST) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, "Argument to %s%screment operator is const.", post ? "post" : "pre", decrement ? "de" : "in"); -@@ -2307,57 +2411,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d +@@ -2307,57 +2423,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d return true; } @@ -7858,7 +9836,7 @@ index 3f319dea0d8..213cec79c3d 100644 { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); struct hlsl_deref dst_deref; -@@ -2376,38 +2432,107 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i +@@ -2376,38 +2444,107 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); @@ -7867,11 +9845,11 @@ index 3f319dea0d8..213cec79c3d 100644 { struct hlsl_default_value default_value = {0}; - unsigned int dst_index; -- + - if (!hlsl_clone_block(ctx, &block, instrs)) - return; - default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - +- - if (dst->is_param) - dst_index = *store_index; + if (src->type == HLSL_IR_COMPILE || src->type == HLSL_IR_SAMPLER_STATE) @@ -7914,10 +9892,7 @@ index 3f319dea0d8..213cec79c3d 100644 + /* Sampler states end up in the variable's state_blocks instead of + * being used to initialize its value. */ + struct hlsl_ir_sampler_state *sampler_state = hlsl_ir_sampler_state(src); - -- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -- return; -- hlsl_block_add_block(instrs, &block); ++ + if (dst_comp_type->class != HLSL_CLASS_SAMPLER) + { + struct vkd3d_string_buffer *dst_string; @@ -7942,7 +9917,10 @@ index 3f319dea0d8..213cec79c3d 100644 + { + if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) + return; -+ + +- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) +- return; +- hlsl_block_add_block(instrs, &block); + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) + return; + hlsl_block_add_block(instrs, &block); @@ -7982,7 +9960,7 @@ index 3f319dea0d8..213cec79c3d 100644 static bool type_has_object_components(const struct hlsl_type *type) { if (type->class == HLSL_CLASS_ARRAY) -@@ -2733,13 +2858,15 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2733,13 +2870,15 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var if (v->initializer.args_count) { @@ -8000,7 +9978,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (is_default_values_initializer) { -@@ -2769,19 +2896,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2769,19 +2908,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->initializer.args[0] = node_from_block(v->initializer.instrs); } @@ -8021,7 +9999,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (is_default_values_initializer) { -@@ -2795,6 +2910,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2795,6 +2922,9 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var { hlsl_block_add_block(initializers, v->initializer.instrs); } @@ -8031,7 +10009,7 @@ index 3f319dea0d8..213cec79c3d 100644 } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { -@@ -2835,28 +2953,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var +@@ -2835,28 +2965,36 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var return initializers; } @@ -8041,11 +10019,11 @@ index 3f319dea0d8..213cec79c3d 100644 + bool is_compile, const struct parse_initializer *args) { - unsigned int i; -+ unsigned int i, k; - +- - if (decl->parameters.count < args->args_count) - return false; -- ++ unsigned int i, k; + - for (i = 0; i < args->args_count; ++i) + k = 0; + for (i = 0; i < decl->parameters.count; ++i) @@ -8080,7 +10058,7 @@ index 3f319dea0d8..213cec79c3d 100644 const struct vkd3d_shader_location *loc) { struct hlsl_ir_function_decl *decl, *compatible_match = NULL; -@@ -2869,7 +2995,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, +@@ -2869,7 +3007,7 @@ static struct hlsl_ir_function_decl *find_function_call(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry) { @@ -8089,7 +10067,7 @@ index 3f319dea0d8..213cec79c3d 100644 { if (compatible_match) { -@@ -2890,26 +3016,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc +@@ -2890,26 +3028,35 @@ static struct hlsl_ir_node *hlsl_new_void_expr(struct hlsl_ctx *ctx, const struc return hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc); } @@ -8132,7 +10110,7 @@ index 3f319dea0d8..213cec79c3d 100644 arg = cast; } -@@ -2918,13 +3053,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2918,13 +3065,15 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu struct hlsl_ir_node *store; if (!(store = hlsl_new_simple_store(ctx, param, arg))) @@ -8150,7 +10128,7 @@ index 3f319dea0d8..213cec79c3d 100644 { struct hlsl_ir_var *param = func->parameters.vars[i]; unsigned int comp_count = hlsl_type_component_count(param->data_type); -@@ -2932,6 +3069,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2932,6 +3081,9 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu VKD3D_ASSERT(param->default_values); @@ -8160,7 +10138,7 @@ index 3f319dea0d8..213cec79c3d 100644 hlsl_init_simple_deref_from_var(¶m_deref, param); for (j = 0; j < comp_count; ++j) -@@ -2945,20 +3085,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2945,20 +3097,23 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu { value.u[0] = param->default_values[j].number; if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) @@ -8187,7 +10165,7 @@ index 3f319dea0d8..213cec79c3d 100644 for (i = 0; i < args->args_count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; -@@ -2973,11 +3116,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2973,11 +3128,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu "Output argument to \"%s\" is const.", func->func->name); if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) @@ -8201,7 +10179,7 @@ index 3f319dea0d8..213cec79c3d 100644 } } -@@ -2998,7 +3141,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu +@@ -2998,7 +3153,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu hlsl_block_add_instr(args->instrs, expr); } @@ -8210,7 +10188,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, -@@ -3006,7 +3149,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, +@@ -3006,7 +3161,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, { struct hlsl_type *type = arg->data_type; @@ -8219,7 +10197,7 @@ index 3f319dea0d8..213cec79c3d 100644 return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -@@ -3094,14 +3237,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, +@@ -3094,14 +3249,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx, static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -8236,7 +10214,7 @@ index 3f319dea0d8..213cec79c3d 100644 return convert_args(ctx, params, type, loc); } -@@ -3129,6 +3270,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, +@@ -3129,6 +3282,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode) { struct hlsl_ir_function_decl *func; @@ -8244,7 +10222,7 @@ index 3f319dea0d8..213cec79c3d 100644 struct hlsl_type *type; char *body; -@@ -3152,8 +3294,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, +@@ -3152,8 +3306,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos; @@ -8256,7 +10234,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, fn_name, type->name, -@@ -3165,7 +3308,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, +@@ -3165,7 +3320,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx, if (!func) return false; @@ -8265,7 +10243,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_acos(struct hlsl_ctx *ctx, -@@ -3282,9 +3425,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, +@@ -3282,9 +3437,9 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, " : poly_approx;\n" "}"; @@ -8277,7 +10255,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(buf = hlsl_get_string_buffer(ctx))) return false; -@@ -3314,7 +3457,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, +@@ -3314,7 +3469,7 @@ static bool write_atan_or_atan2(struct hlsl_ctx *ctx, if (!func) return false; @@ -8286,7 +10264,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_atan(struct hlsl_ctx *ctx, -@@ -3507,7 +3650,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, +@@ -3507,7 +3662,7 @@ static bool write_cosh_or_sinh(struct hlsl_ctx *ctx, if (!func) return false; @@ -8295,7 +10273,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_cosh(struct hlsl_ctx *ctx, -@@ -3525,9 +3668,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, +@@ -3525,9 +3680,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, struct hlsl_type *cast_type; enum hlsl_base_type base; @@ -8307,7 +10285,7 @@ index 3f319dea0d8..213cec79c3d 100644 base = HLSL_TYPE_FLOAT; cast_type = hlsl_get_vector_type(ctx, base, 3); -@@ -3698,15 +3840,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, +@@ -3698,15 +3852,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, return false; } @@ -8327,7 +10305,7 @@ index 3f319dea0d8..213cec79c3d 100644 template = templates[dim]; switch (dim) -@@ -3734,7 +3875,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, +@@ -3734,7 +3887,7 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx, if (!func) return false; @@ -8336,26 +10314,10 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_distance(struct hlsl_ctx *ctx, -@@ -3751,19 +3892,63 @@ static bool intrinsic_distance(struct hlsl_ctx *ctx, - if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, arg2, loc))) - return false; +@@ -3766,6 +3919,50 @@ static bool intrinsic_dot(struct hlsl_ctx *ctx, + return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); + } -- if (!(add = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, arg1, neg, loc))) -+ if (!(add = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_ADD, arg1, neg, loc))) -+ return false; -+ -+ if (!(dot = add_binary_dot_expr(ctx, params->instrs, add, add, loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, dot, loc); -+} -+ -+static bool intrinsic_dot(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); -+} -+ +static bool intrinsic_dst(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc) +{ @@ -8391,26 +10353,19 @@ index 3f319dea0d8..213cec79c3d 100644 + vec4_type->name, type->name, type->name, + vec4_type->name, + vec4_type->name))) - return false; -- -- if (!(dot = add_binary_dot_expr(ctx, params->instrs, add, add, loc))) ++ return false; + func = hlsl_compile_internal_function(ctx, "dst", body); + vkd3d_free(body); + if (!func) - return false; - -- return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SQRT, dot, loc); --} -- --static bool intrinsic_dot(struct hlsl_ctx *ctx, -- const struct parse_initializer *params, const struct vkd3d_shader_location *loc) --{ -- return !!add_binary_dot_expr(ctx, params->instrs, params->args[0], params->args[1], loc); ++ return false; ++ + return !!add_user_call(ctx, func, params, false, loc); - } - ++} ++ static bool intrinsic_exp(struct hlsl_ctx *ctx, -@@ -3809,9 +3994,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3809,9 +4006,9 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, " return dot(i, ng) < 0 ? n : -n;\n" "}\n"; @@ -8422,7 +10377,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name))) -@@ -3821,7 +4006,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, +@@ -3821,7 +4018,7 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, if (!func) return false; @@ -8431,7 +10386,29 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, -@@ -3926,7 +4111,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, +@@ -3839,6 +4036,21 @@ static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, + return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); + } + ++static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_type *type; ++ ++ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) ++ return false; ++ ++ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); ++ ++ operands[0] = params->args[0]; ++ return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); ++} ++ + static bool intrinsic_floor(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3926,7 +4138,7 @@ static bool intrinsic_fwidth(struct hlsl_ctx *ctx, if (!func) return false; @@ -8440,7 +10417,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_ldexp(struct hlsl_ctx *ctx, -@@ -4029,7 +4214,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, +@@ -4029,7 +4241,7 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!(func = hlsl_compile_internal_function(ctx, "lit", body))) return false; @@ -8449,7 +10426,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_log(struct hlsl_ctx *ctx, -@@ -4081,6 +4266,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, +@@ -4081,6 +4293,20 @@ static bool intrinsic_log2(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_LOG2, arg, loc); } @@ -8470,7 +10447,7 @@ index 3f319dea0d8..213cec79c3d 100644 static bool intrinsic_max(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -4099,6 +4298,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, +@@ -4099,6 +4325,35 @@ static bool intrinsic_min(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, params->args[0], params->args[1], loc); } @@ -8506,7 +10483,7 @@ index 3f319dea0d8..213cec79c3d 100644 static bool intrinsic_mul(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -4285,13 +4513,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, +@@ -4285,13 +4540,9 @@ static bool intrinsic_reflect(struct hlsl_ctx *ctx, static bool intrinsic_refract(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -8522,7 +10499,7 @@ index 3f319dea0d8..213cec79c3d 100644 char *body; static const char template[] = -@@ -4303,28 +4527,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, +@@ -4303,28 +4554,34 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, " return t >= 0.0 ? i.x * r - (i.x * d + sqrt(t)) * n : 0;\n" "}"; @@ -8571,7 +10548,7 @@ index 3f319dea0d8..213cec79c3d 100644 return false; func = hlsl_compile_internal_function(ctx, "refract", body); -@@ -4332,7 +4562,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, +@@ -4332,7 +4589,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, if (!func) return false; @@ -8580,7 +10557,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_round(struct hlsl_ctx *ctx, -@@ -4415,6 +4645,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, +@@ -4415,6 +4672,35 @@ static bool intrinsic_sin(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_SIN, arg, loc); } @@ -8616,7 +10593,7 @@ index 3f319dea0d8..213cec79c3d 100644 static bool intrinsic_sinh(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { -@@ -4436,9 +4695,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -4436,9 +4722,9 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, " return (p * p) * (3 - 2 * p);\n" "}"; @@ -8628,7 +10605,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(body = hlsl_sprintf_alloc(ctx, template, type->name, type->name, type->name, type->name, type->name))) return false; -@@ -4447,7 +4706,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, +@@ -4447,7 +4733,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!func) return false; @@ -8637,7 +10614,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_sqrt(struct hlsl_ctx *ctx, -@@ -4469,13 +4728,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, +@@ -4469,13 +4755,12 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; @@ -8652,7 +10629,7 @@ index 3f319dea0d8..213cec79c3d 100644 return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); } -@@ -4523,7 +4781,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, +@@ -4523,7 +4808,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, if (!func) return false; @@ -8661,7 +10638,7 @@ index 3f319dea0d8..213cec79c3d 100644 } static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, -@@ -4661,17 +4919,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * +@@ -4661,17 +4946,17 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (!(var = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 2), loc))) return false; @@ -8682,7 +10659,18 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; -@@ -4937,6 +5195,7 @@ intrinsic_functions[] = +@@ -4890,6 +5175,10 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *expr; + ++ if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL || hlsl_version_lt(ctx, 4, 1)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); ++ + if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, + operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) + return false; +@@ -4937,9 +5226,11 @@ intrinsic_functions[] = {"determinant", 1, true, intrinsic_determinant}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, @@ -8690,7 +10678,11 @@ index 3f319dea0d8..213cec79c3d 100644 {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, {"f16tof32", 1, true, intrinsic_f16tof32}, -@@ -4952,8 +5211,10 @@ intrinsic_functions[] = ++ {"f32tof16", 1, true, intrinsic_f32tof16}, + {"faceforward", 3, true, intrinsic_faceforward}, + {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, +@@ -4952,8 +5243,10 @@ intrinsic_functions[] = {"log", 1, true, intrinsic_log}, {"log10", 1, true, intrinsic_log10}, {"log2", 1, true, intrinsic_log2}, @@ -8701,7 +10693,7 @@ index 3f319dea0d8..213cec79c3d 100644 {"mul", 2, true, intrinsic_mul}, {"normalize", 1, true, intrinsic_normalize}, {"pow", 2, true, intrinsic_pow}, -@@ -4966,6 +5227,7 @@ intrinsic_functions[] = +@@ -4966,6 +5259,7 @@ intrinsic_functions[] = {"saturate", 1, true, intrinsic_saturate}, {"sign", 1, true, intrinsic_sign}, {"sin", 1, true, intrinsic_sin}, @@ -8709,7 +10701,7 @@ index 3f319dea0d8..213cec79c3d 100644 {"sinh", 1, true, intrinsic_sinh}, {"smoothstep", 3, true, intrinsic_smoothstep}, {"sqrt", 1, true, intrinsic_sqrt}, -@@ -5002,9 +5264,18 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, +@@ -5002,9 +5296,18 @@ static struct hlsl_block *add_call(struct hlsl_ctx *ctx, const char *name, struct intrinsic_function *intrinsic; struct hlsl_ir_function_decl *decl; @@ -8730,7 +10722,7 @@ index 3f319dea0d8..213cec79c3d 100644 goto fail; } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), -@@ -5060,18 +5331,94 @@ fail: +@@ -5060,18 +5363,94 @@ fail: return NULL; } @@ -8828,7 +10820,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; -@@ -5088,6 +5435,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5088,6 +5467,14 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_type *cond_type = cond->data_type; struct hlsl_type *common_type; @@ -8843,7 +10835,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (cond_type->class > HLSL_CLASS_LAST_NUMERIC) { struct vkd3d_string_buffer *string; -@@ -5113,11 +5468,6 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5113,11 +5500,6 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, } else { @@ -8855,7 +10847,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (common_type->dimx == 1 && common_type->dimy == 1) { common_type = hlsl_get_numeric_type(ctx, cond_type->class, -@@ -5139,6 +5489,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -5139,6 +5521,11 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_release_string_buffer(ctx, cond_string); hlsl_release_string_buffer(ctx, value_string); } @@ -8867,10 +10859,193 @@ index 3f319dea0d8..213cec79c3d 100644 } if (!(first = add_implicit_conversion(ctx, block, first, common_type, &first->loc))) -@@ -5854,6 +6209,21 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru - const struct hlsl_type *object_type = object->data_type; - const struct method_function *method; +@@ -5196,6 +5583,7 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) + case HLSL_SAMPLER_DIM_CUBE: + case HLSL_SAMPLER_DIM_CUBEARRAY: + case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: + /* Offset parameters not supported for these types. */ + return 0; + default: +@@ -5215,6 +5603,55 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct + return false; + } ++static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; ++ struct hlsl_ir_node *load; ++ unsigned int value_dim; ++ ++ if (params->args_count != 1 && params->args_count != 2) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method 'Load': expected between 1 and 2, but got %u.", ++ params->args_count); ++ return false; ++ } ++ ++ if (params->args_count == 2) ++ { ++ hlsl_fixme(ctx, loc, "Tiled resource status argument."); ++ return false; ++ } ++ ++ if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Scalar address argument expected for '%s'.", name); ++ return false; ++ } ++ ++ if (!strcmp(name, "Load")) ++ value_dim = 1; ++ else if (!strcmp(name, "Load2")) ++ value_dim = 2; ++ else if (!strcmp(name, "Load3")) ++ value_dim = 3; ++ else ++ value_dim = 4; ++ ++ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ return false; ++ ++ load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim); ++ load_params.resource = object; ++ ++ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) ++ return false; ++ hlsl_block_add_instr(block, load); ++ return true; ++} ++ + static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -5224,6 +5661,9 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_node *load; + bool multisampled; + ++ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ return add_raw_load_method_call(ctx, block, object, name, params, loc); ++ + if (object_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { + hlsl_fixme(ctx, loc, "Method '%s' for structured buffers.", name); +@@ -5813,32 +6253,88 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block + return true; + } + ++static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, ++ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *offset, *rhs, *store; ++ struct hlsl_deref resource_deref; ++ unsigned int value_dim; ++ ++ if (params->args_count != 2) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Wrong number of arguments to method '%s': expected 2.", name); ++ return false; ++ } ++ ++ if (!strcmp(name, "Store")) ++ value_dim = 1; ++ else if (!strcmp(name, "Store2")) ++ value_dim = 2; ++ else if (!strcmp(name, "Store3")) ++ value_dim = 3; ++ else ++ value_dim = 4; ++ ++ if (!(offset = add_implicit_conversion(ctx, block, params->args[0], ++ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ return false; ++ ++ if (!(rhs = add_implicit_conversion(ctx, block, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc))) ++ return false; ++ ++ if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, object)) ++ return false; ++ ++ if (!(store = hlsl_new_resource_store(ctx, &resource_deref, offset, rhs, loc))) ++ { ++ hlsl_cleanup_deref(&resource_deref); ++ return false; ++ } ++ ++ hlsl_block_add_instr(block, store); ++ hlsl_cleanup_deref(&resource_deref); ++ ++ return true; ++} ++ + static const struct method_function + { + const char *name; + bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); +- bool valid_dims[HLSL_SAMPLER_DIM_MAX + 1]; ++ char valid_dims[HLSL_SAMPLER_DIM_MAX + 1]; + } +-object_methods[] = ++texture_methods[] = + { +- /* g c 1d 2d 3d cube 1darr 2darr 2dms 2dmsarr cubearr buff sbuff*/ +- { "Gather", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, +- { "GatherAlpha", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, +- { "GatherBlue", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, +- { "GatherGreen", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, +- { "GatherRed", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, ++ { "Gather", add_gather_method_call, "00010101001000" }, ++ { "GatherAlpha", add_gather_method_call, "00010101001000" }, ++ { "GatherBlue", add_gather_method_call, "00010101001000" }, ++ { "GatherGreen", add_gather_method_call, "00010101001000" }, ++ { "GatherRed", add_gather_method_call, "00010101001000" }, ++ ++ { "GetDimensions", add_getdimensions_method_call, "00111111111110" }, + +- { "GetDimensions", add_getdimensions_method_call, {0,0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, ++ { "Load", add_load_method_call, "00111011110111" }, ++ { "Load2", add_raw_load_method_call, "00000000000001" }, ++ { "Load3", add_raw_load_method_call, "00000000000001" }, ++ { "Load4", add_raw_load_method_call, "00000000000001" }, + +- { "Load", add_load_method_call, {0,0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1}}, ++ { "Sample", add_sample_method_call, "00111111001000" }, ++ { "SampleBias", add_sample_lod_method_call, "00111111001000" }, ++ { "SampleCmp", add_sample_cmp_method_call, "00111111001000" }, ++ { "SampleCmpLevelZero", add_sample_cmp_method_call, "00111111001000" }, ++ { "SampleGrad", add_sample_grad_method_call, "00111111001000" }, ++ { "SampleLevel", add_sample_lod_method_call, "00111111001000" }, ++}; + +- { "Sample", add_sample_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, +- { "SampleBias", add_sample_lod_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, +- { "SampleCmp", add_sample_cmp_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, +- { "SampleCmpLevelZero", add_sample_cmp_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, +- { "SampleGrad", add_sample_grad_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, +- { "SampleLevel", add_sample_lod_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, ++static const struct method_function uav_methods[] = ++{ ++ { "Store", add_store_method_call, "00000000000001" }, ++ { "Store2", add_store_method_call, "00000000000001" }, ++ { "Store3", add_store_method_call, "00000000000001" }, ++ { "Store4", add_store_method_call, "00000000000001" }, + }; + + static int object_method_function_name_compare(const void *a, const void *b) +@@ -5852,9 +6348,35 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru + const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + const struct hlsl_type *object_type = object->data_type; +- const struct method_function *method; ++ const struct method_function *method, *methods; ++ unsigned int count; ++ + if (object_type->class == HLSL_CLASS_ERROR) + { + block->value = ctx->error_instr; @@ -8885,11 +11060,36 @@ index 3f319dea0d8..213cec79c3d 100644 + return true; + } + } -+ - if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) + +- if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) ++ if (object_type->class == HLSL_CLASS_TEXTURE) ++ { ++ count = ARRAY_SIZE(texture_methods); ++ methods = texture_methods; ++ } ++ else if (object_type->class == HLSL_CLASS_UAV) ++ { ++ count = ARRAY_SIZE(uav_methods); ++ methods = uav_methods; ++ } ++ else { struct vkd3d_string_buffer *string; -@@ -5995,16 +6365,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + +@@ -5865,10 +6387,10 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru + return false; + } + +- method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), sizeof(*method), ++ method = bsearch(name, methods, count, sizeof(*method), + object_method_function_name_compare); + +- if (method && method->valid_dims[object_type->sampler_dim]) ++ if (method && method->valid_dims[object_type->sampler_dim] == '1') + { + return method->handler(ctx, block, object, name, params, loc); + } +@@ -5995,16 +6517,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, hlsl_release_string_buffer(ctx, string); } @@ -8906,7 +11106,27 @@ index 3f319dea0d8..213cec79c3d 100644 } %locations -@@ -6058,8 +6418,10 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h +@@ -6037,10 +6549,11 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + struct parse_if_body if_body; + enum parse_assign_op assign_op; + struct hlsl_reg_reservation reg_reservation; +- struct parse_colon_attribute colon_attribute; ++ struct parse_colon_attributes colon_attributes; + struct hlsl_semantic semantic; + enum hlsl_buffer_type buffer_type; + enum hlsl_sampler_dim sampler_dim; ++ enum hlsl_so_object_type so_type; + struct hlsl_attribute *attr; + struct parse_attribute_list attr_list; + struct hlsl_ir_switch_case *switch_case; +@@ -6052,14 +6565,17 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_BLENDSTATE + %token KW_BREAK + %token KW_BUFFER ++%token KW_BYTEADDRESSBUFFER + %token KW_CASE + %token KW_CONSTANTBUFFER + %token KW_CBUFFER %token KW_CENTROID %token KW_COLUMN_MAJOR %token KW_COMPILE @@ -8917,7 +11137,7 @@ index 3f319dea0d8..213cec79c3d 100644 %token KW_CONTINUE %token KW_DEFAULT %token KW_DEPTHSTENCILSTATE -@@ -6067,7 +6429,6 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h +@@ -6067,7 +6583,6 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_DISCARD %token KW_DO %token KW_DOMAINSHADER @@ -8925,7 +11145,69 @@ index 3f319dea0d8..213cec79c3d 100644 %token KW_ELSE %token KW_EXPORT %token KW_EXTERN -@@ -6273,6 +6634,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h +@@ -6082,6 +6597,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_INLINE + %token KW_INOUT + %token KW_LINEAR ++%token KW_LINESTREAM + %token KW_MATRIX + %token KW_NAMESPACE + %token KW_NOINTERPOLATION +@@ -6091,6 +6607,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_PACKOFFSET + %token KW_PASS + %token KW_PIXELSHADER ++%token KW_POINTSTREAM + %token KW_RASTERIZERORDEREDBUFFER + %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER + %token KW_RASTERIZERORDEREDTEXTURE1D +@@ -6104,6 +6621,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_REGISTER + %token KW_ROW_MAJOR + %token KW_RWBUFFER ++%token KW_RWBYTEADDRESSBUFFER + %token KW_RWSTRUCTUREDBUFFER + %token KW_RWTEXTURE1D + %token KW_RWTEXTURE1DARRAY +@@ -6118,6 +6636,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_SAMPLER_STATE + %token KW_SAMPLERCOMPARISONSTATE + %token KW_SHARED ++%token KW_SNORM + %token KW_STATEBLOCK + %token KW_STATEBLOCK_STATE + %token KW_STATIC +@@ -6138,10 +6657,12 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_TEXTURE3D + %token KW_TEXTURECUBE + %token KW_TEXTURECUBEARRAY ++%token KW_TRIANGLESTREAM + %token KW_TRUE + %token KW_TYPEDEF + %token KW_UNSIGNED + %token KW_UNIFORM ++%token KW_UNORM + %token KW_VECTOR + %token KW_VERTEXSHADER + %token KW_VOID +@@ -6230,7 +6751,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + + %type buffer_type + +-%type colon_attribute ++%type colon_attributes + + %type field + %type fields_list +@@ -6267,12 +6788,15 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + + %type semantic + ++%type so_type ++ + %type state_block + + %type state_block_index_opt %type switch_case @@ -8933,7 +11215,24 @@ index 3f319dea0d8..213cec79c3d 100644 %type field_type %type named_struct_spec %type unnamed_struct_spec -@@ -6487,11 +6849,28 @@ struct_spec: +@@ -6280,6 +6804,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %type type + %type type_no_void + %type typedef_type ++%type resource_format + + %type state_block_list + %type type_spec +@@ -6416,7 +6941,7 @@ effect_group: + } + + buffer_declaration: +- var_modifiers buffer_type any_identifier colon_attribute annotations_opt ++ var_modifiers buffer_type any_identifier colon_attributes annotations_opt + { + if ($4.semantic.name) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); +@@ -6487,11 +7012,28 @@ struct_spec: | unnamed_struct_spec named_struct_spec: @@ -8964,7 +11263,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (hlsl_get_var(ctx->cur_scope, $2)) { -@@ -6518,6 +6897,23 @@ any_identifier: +@@ -6518,6 +7060,23 @@ any_identifier: | TYPE_IDENTIFIER | NEW_IDENTIFIER @@ -8988,7 +11287,16 @@ index 3f319dea0d8..213cec79c3d 100644 fields_list: %empty { -@@ -6827,6 +7223,8 @@ func_prototype: +@@ -6707,7 +7266,7 @@ func_declaration: + + func_prototype_no_attrs: + /* var_modifiers is necessary to avoid shift/reduce conflicts. */ +- var_modifiers type var_identifier '(' parameters ')' colon_attribute ++ var_modifiers type var_identifier '(' parameters ')' colon_attributes + { + uint32_t modifiers = $1; + struct hlsl_ir_var *var; +@@ -6827,6 +7386,8 @@ func_prototype: func_prototype_no_attrs | attribute_list func_prototype_no_attrs { @@ -8997,7 +11305,160 @@ index 3f319dea0d8..213cec79c3d 100644 if ($2.first) { $2.decl->attr_count = $1.count; -@@ -7614,11 +8012,21 @@ stateblock_lhs_identifier: +@@ -6882,28 +7443,39 @@ var_identifier: + VAR_IDENTIFIER + | NEW_IDENTIFIER + +-colon_attribute: ++colon_attributes: + %empty + { + $$.semantic = (struct hlsl_semantic){0}; + $$.reg_reservation.reg_type = 0; + $$.reg_reservation.offset_type = 0; + } +- | semantic ++ | colon_attributes semantic + { +- $$.semantic = $1; +- $$.reg_reservation.reg_type = 0; +- $$.reg_reservation.offset_type = 0; ++ hlsl_cleanup_semantic(&$$.semantic); ++ $$.semantic = $2; + } +- | register_reservation ++ | colon_attributes register_reservation + { +- $$.semantic = (struct hlsl_semantic){0}; +- $$.reg_reservation = $1; ++ if ($$.reg_reservation.reg_type) ++ hlsl_fixme(ctx, &@2, "Multiple register() reservations."); ++ ++ $$.reg_reservation.reg_type = $2.reg_type; ++ $$.reg_reservation.reg_index = $2.reg_index; ++ $$.reg_reservation.reg_space = $2.reg_space; + } +- | packoffset_reservation ++ | colon_attributes packoffset_reservation + { +- $$.semantic = (struct hlsl_semantic){0}; +- $$.reg_reservation = $1; ++ if (ctx->cur_buffer == ctx->globals_buffer) ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "The packoffset() reservation is only allowed within 'cbuffer' blocks."); ++ } ++ else ++ { ++ $$.reg_reservation.offset_type = $2.offset_type; ++ $$.reg_reservation.offset_index = $2.offset_index; ++ } + } + + semantic: +@@ -7099,7 +7671,7 @@ parameter: + } + + parameter_decl: +- var_modifiers type_no_void any_identifier arrays colon_attribute ++ var_modifiers type_no_void any_identifier arrays colon_attributes + { + uint32_t modifiers = $1; + struct hlsl_type *type; +@@ -7239,6 +7811,29 @@ rov_type: + $$ = HLSL_SAMPLER_DIM_3D; + } + ++so_type: ++ KW_POINTSTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_POINT_STREAM; ++ } ++ | KW_LINESTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_LINE_STREAM; ++ } ++ | KW_TRIANGLESTREAM ++ { ++ $$ = HLSL_STREAM_OUTPUT_TRIANGLE_STREAM; ++ } ++ ++resource_format: ++ var_modifiers type ++ { ++ uint32_t modifiers = $1; ++ ++ if (!($$ = apply_type_modifiers(ctx, $2, &modifiers, false, &@1))) ++ YYABORT; ++ } ++ + type_no_void: + KW_VECTOR '<' type ',' C_INTEGER '>' + { +@@ -7332,18 +7927,18 @@ type_no_void: + { + $$ = hlsl_new_texture_type(ctx, $1, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); + } +- | texture_type '<' type '>' ++ | texture_type '<' resource_format '>' + { + validate_texture_format_type(ctx, $3, &@3); + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); + } +- | texture_ms_type '<' type '>' ++ | texture_ms_type '<' resource_format '>' + { + validate_texture_format_type(ctx, $3, &@3); + + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); + } +- | texture_ms_type '<' type ',' shift_expr '>' ++ | texture_ms_type '<' resource_format ',' shift_expr '>' + { + unsigned int sample_count; + struct hlsl_block block; +@@ -7359,16 +7954,28 @@ type_no_void: + + $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); + } +- | uav_type '<' type '>' ++ | KW_BYTEADDRESSBUFFER ++ { ++ $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), 0); ++ } ++ | uav_type '<' resource_format '>' + { + validate_uav_type(ctx, $1, $3, &@3); + $$ = hlsl_new_uav_type(ctx, $1, $3, false); + } +- | rov_type '<' type '>' ++ | rov_type '<' resource_format '>' + { +- validate_uav_type(ctx, $1, $3, &@3); ++ validate_uav_type(ctx, $1, $3, &@4); + $$ = hlsl_new_uav_type(ctx, $1, $3, true); + } ++ | so_type '<' type '>' ++ { ++ $$ = hlsl_new_stream_output_type(ctx, $1, $3); ++ } ++ | KW_RWBYTEADDRESSBUFFER ++ { ++ $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); ++ } + | KW_STRING + { + $$ = ctx->builtin_types.string; +@@ -7587,7 +8194,7 @@ variables_def_typed: + } + + variable_decl: +- any_identifier arrays colon_attribute annotations_opt ++ any_identifier arrays colon_attributes annotations_opt + { + $$ = hlsl_alloc(ctx, sizeof(*$$)); + $$->loc = @1; +@@ -7614,11 +8221,21 @@ stateblock_lhs_identifier: if (!($$ = hlsl_strdup(ctx, "pixelshader"))) YYABORT; } @@ -9019,7 +11480,7 @@ index 3f319dea0d8..213cec79c3d 100644 state_block_index_opt: %empty -@@ -7666,7 +8074,7 @@ state_block: +@@ -7666,7 +8283,7 @@ state_block: vkd3d_free($5.args); $$ = $1; @@ -9028,7 +11489,7 @@ index 3f319dea0d8..213cec79c3d 100644 } | state_block any_identifier '(' func_arguments ')' ';' { -@@ -7694,7 +8102,7 @@ state_block: +@@ -7694,7 +8311,7 @@ state_block: hlsl_validate_state_block_entry(ctx, entry, &@4); $$ = $1; @@ -9037,7 +11498,22 @@ index 3f319dea0d8..213cec79c3d 100644 } state_block_list: -@@ -7931,6 +8339,7 @@ complex_initializer: +@@ -7906,6 +8523,14 @@ var_modifiers: + { + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); + } ++ | KW_UNORM var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_UNORM, &@1); ++ } ++ | KW_SNORM var_modifiers ++ { ++ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SNORM, &@1); ++ } + | var_identifier var_modifiers + { + $$ = $2; +@@ -7931,6 +8556,7 @@ complex_initializer: $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; @@ -9045,7 +11521,7 @@ index 3f319dea0d8..213cec79c3d 100644 } | '{' complex_initializer_list '}' { -@@ -7962,6 +8371,7 @@ complex_initializer_list: +@@ -7962,6 +8588,7 @@ complex_initializer_list: $$.args[$$.args_count++] = $3.args[i]; hlsl_block_add_block($$.instrs, $3.instrs); free_parse_initializer(&$3); @@ -9053,7 +11529,7 @@ index 3f319dea0d8..213cec79c3d 100644 } initializer_expr: -@@ -7979,6 +8389,7 @@ initializer_expr_list: +@@ -7979,6 +8606,7 @@ initializer_expr_list: $$.args[0] = node_from_block($1); $$.instrs = $1; $$.braces = false; @@ -9061,7 +11537,7 @@ index 3f319dea0d8..213cec79c3d 100644 } | initializer_expr_list ',' initializer_expr { -@@ -8092,8 +8503,7 @@ selection_statement: +@@ -8092,8 +8720,7 @@ selection_statement: struct hlsl_ir_node *instr; unsigned int i; @@ -9071,7 +11547,7 @@ index 3f319dea0d8..213cec79c3d 100644 for (i = 0; i < attributes->count; ++i) { -@@ -8298,6 +8708,7 @@ func_arguments: +@@ -8298,6 +8925,7 @@ func_arguments: if (!($$.instrs = make_empty_block(ctx))) YYABORT; $$.braces = false; @@ -9079,7 +11555,7 @@ index 3f319dea0d8..213cec79c3d 100644 } | initializer_expr_list -@@ -8391,6 +8802,34 @@ primary_expr: +@@ -8391,6 +9019,34 @@ primary_expr: { $$ = $2; } @@ -9114,7 +11590,7 @@ index 3f319dea0d8..213cec79c3d 100644 | var_identifier '(' func_arguments ')' { if (!($$ = add_call(ctx, $1, &$3, &@1))) -@@ -8400,6 +8839,25 @@ primary_expr: +@@ -8400,6 +9056,25 @@ primary_expr: } vkd3d_free($1); } @@ -9140,7 +11616,7 @@ index 3f319dea0d8..213cec79c3d 100644 | NEW_IDENTIFIER { if (ctx->in_state_block) -@@ -8416,7 +8874,11 @@ primary_expr: +@@ -8416,7 +9091,11 @@ primary_expr: else { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Identifier \"%s\" is not declared.", $1); @@ -9153,7 +11629,7 @@ index 3f319dea0d8..213cec79c3d 100644 } } -@@ -8446,25 +8908,12 @@ postfix_expr: +@@ -8446,46 +9125,34 @@ postfix_expr: if (node->data_type->class == HLSL_CLASS_STRUCT) { @@ -9162,42 +11638,57 @@ index 3f319dea0d8..213cec79c3d 100644 - unsigned int field_idx = 0; - - if (!(field = get_struct_field(type->e.record.fields, type->e.record.field_count, $3))) -- { -- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); -- vkd3d_free($3); -- YYABORT; -- } -- -- field_idx = field - type->e.record.fields; -- if (!add_record_access(ctx, $1, node, field_idx, &@2)) + if (!add_record_access_recurse(ctx, $1, $3, &@2)) { +- hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", $3); ++ destroy_block($1); vkd3d_free($3); YYABORT; } - vkd3d_free($3); +- +- field_idx = field - type->e.record.fields; +- if (!add_record_access(ctx, $1, node, field_idx, &@2)) +- { +- vkd3d_free($3); +- YYABORT; +- } +- vkd3d_free($3); - $$ = $1; } else if (hlsl_is_numeric_type(node->data_type)) { -@@ -8478,14 +8927,14 @@ postfix_expr: + struct hlsl_ir_node *swizzle; + +- if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) ++ if ((swizzle = get_swizzle(ctx, node, $3, &@3))) ++ { ++ hlsl_block_add_instr($1, swizzle); ++ } ++ else + { + hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); +- vkd3d_free($3); +- YYABORT; ++ $1->value = ctx->error_instr; } - hlsl_block_add_instr($1, swizzle); - vkd3d_free($3); +- hlsl_block_add_instr($1, swizzle); +- vkd3d_free($3); - $$ = $1; } - else + else if (node->data_type->class != HLSL_CLASS_ERROR) { hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3); - vkd3d_free($3); - YYABORT; +- vkd3d_free($3); +- YYABORT; ++ $1->value = ctx->error_instr; } ++ vkd3d_free($3); + $$ = $1; } | postfix_expr '[' expr ']' { -@@ -8523,14 +8972,6 @@ postfix_expr: +@@ -8523,14 +9190,6 @@ postfix_expr: free_parse_initializer(&$4); YYABORT; } @@ -9212,7 +11703,7 @@ index 3f319dea0d8..213cec79c3d 100644 if (!($$ = add_constructor(ctx, $2, &$4, &@2))) { -@@ -8597,10 +9038,6 @@ unary_expr: +@@ -8597,10 +9256,6 @@ unary_expr: /* var_modifiers is necessary to avoid shift/reduce conflicts. */ | '(' var_modifiers type arrays ')' unary_expr { @@ -9223,7 +11714,7 @@ index 3f319dea0d8..213cec79c3d 100644 if ($2) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -@@ -8608,36 +9045,13 @@ unary_expr: +@@ -8608,36 +9263,13 @@ unary_expr: YYABORT; } @@ -9264,21 +11755,31 @@ index 3f319dea0d8..213cec79c3d 100644 } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 92b5c71c43f..88bec8610cb 100644 +index 92b5c71c43f..1fbf670f032 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -276,8 +276,8 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls +@@ -19,6 +19,7 @@ + */ + + #include "hlsl.h" ++#include "vkd3d_shader_private.h" + #include + #include + +@@ -276,9 +277,9 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls == base_type_get_semantic_equivalent(type2->e.numeric.type); } -static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, - struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, +- uint32_t index, bool output, const struct vkd3d_shader_location *loc) +static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, + struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, - uint32_t index, bool output, const struct vkd3d_shader_location *loc) ++ uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc) { struct hlsl_semantic new_semantic; -@@ -287,7 +287,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + struct hlsl_ir_var *ext_var; +@@ -287,7 +288,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", output ? "output" : "input", semantic->name, index))) return NULL; @@ -9287,9 +11788,11 @@ index 92b5c71c43f..88bec8610cb 100644 { if (!ascii_strcasecmp(ext_var->name, new_name)) { -@@ -339,12 +339,12 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir +@@ -338,14 +339,32 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + else ext_var->is_input_semantic = 1; ext_var->is_param = var->is_param; ++ ext_var->force_align = force_align; list_add_before(&var->scope_entry, &ext_var->scope_entry); - list_add_tail(&ctx->extern_vars, &ext_var->extern_entry); + list_add_tail(&func->extern_vars, &ext_var->extern_entry); @@ -9298,42 +11801,110 @@ index 92b5c71c43f..88bec8610cb 100644 } -static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t field_modifiers) ++{ ++ field_modifiers |= modifiers; ++ ++ /* TODO: 'sample' modifier is not supported yet. */ ++ ++ /* 'nointerpolation' always takes precedence, next the same is done for ++ * 'sample', remaining modifiers are combined. */ ++ if (field_modifiers & HLSL_STORAGE_NOINTERPOLATION) ++ { ++ field_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; ++ field_modifiers |= HLSL_STORAGE_NOINTERPOLATION; ++ } ++ ++ return field_modifiers; ++} ++ +static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++ uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) { struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; -@@ -375,7 +375,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + struct vkd3d_shader_location *loc = &lhs->node.loc; +@@ -369,14 +388,17 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); + ++ if (hlsl_type_major_size(type) > 1) ++ force_align = true; ++ + for (i = 0; i < hlsl_type_major_size(type); ++i) + { + struct hlsl_ir_node *store, *cast; struct hlsl_ir_var *input; struct hlsl_ir_load *load; - if (!(input = add_semantic_var(ctx, var, vector_type_src, modifiers, semantic, -+ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, - semantic_index + i, false, loc))) +- semantic_index + i, false, loc))) ++ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, ++ modifiers, semantic, semantic_index + i, false, force_align, loc))) return; -@@ -408,8 +408,8 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) +@@ -408,8 +430,9 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s } } -static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *lhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers, ++ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) { struct vkd3d_shader_location *loc = &lhs->node.loc; struct hlsl_type *type = lhs->node.data_type; -@@ -466,30 +466,30 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * +@@ -425,12 +448,14 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + + for (i = 0; i < hlsl_type_element_count(type); ++i) + { +- uint32_t element_modifiers = modifiers; ++ uint32_t element_modifiers; + + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; ++ element_modifiers = modifiers; ++ force_align = true; + } + else + { +@@ -444,17 +469,8 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; +- element_modifiers |= field->storage_modifiers; +- +- /* TODO: 'sample' modifier is not supported yet */ +- +- /* 'nointerpolation' always takes precedence, next the same is done for 'sample', +- remaining modifiers are combined. */ +- if (element_modifiers & HLSL_STORAGE_NOINTERPOLATION) +- { +- element_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; +- element_modifiers |= HLSL_STORAGE_NOINTERPOLATION; +- } ++ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); ++ force_align = (i == 0); + } + + if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) +@@ -466,31 +482,33 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * return; list_add_after(&c->entry, &element_load->node.entry); - prepend_input_copy_recurse(ctx, block, element_load, element_modifiers, semantic, elem_semantic_index); -+ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); ++ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, ++ semantic, elem_semantic_index, force_align); } } else { - prepend_input_copy(ctx, block, lhs, modifiers, semantic, semantic_index); -+ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); ++ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index, force_align); } } @@ -9351,21 +11922,33 @@ index 92b5c71c43f..88bec8610cb 100644 + list_add_head(&func->body.instrs, &load->node.entry); - prepend_input_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); } -static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, -+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) +- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ struct hlsl_ir_load *rhs, uint32_t modifiers, ++ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) { struct hlsl_type *type = rhs->node.data_type, *vector_type; -@@ -517,18 +517,19 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + struct vkd3d_shader_location *loc = &rhs->node.loc; +@@ -511,24 +529,28 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + + vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); + ++ if (hlsl_type_major_size(type) > 1) ++ force_align = true; ++ + for (i = 0; i < hlsl_type_major_size(type); ++i) + { + struct hlsl_ir_node *store; struct hlsl_ir_var *output; struct hlsl_ir_load *load; - if (!(output = add_semantic_var(ctx, var, vector_type, modifiers, semantic, semantic_index + i, true, loc))) + if (!(output = add_semantic_var(ctx, func, var, vector_type, -+ modifiers, semantic, semantic_index + i, true, loc))) ++ modifiers, semantic, semantic_index + i, true, force_align, loc))) return; if (type->class == HLSL_CLASS_MATRIX) @@ -9382,7 +11965,7 @@ index 92b5c71c43f..88bec8610cb 100644 } else { -@@ -536,17 +537,17 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s +@@ -536,17 +558,18 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) return; @@ -9399,12 +11982,34 @@ index 92b5c71c43f..88bec8610cb 100644 -static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_load *rhs, - uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) ++static void append_output_copy_recurse(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, ++ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) { struct vkd3d_shader_location *loc = &rhs->node.loc; struct hlsl_type *type = rhs->node.data_type; -@@ -580,34 +581,34 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * +@@ -562,10 +585,14 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + + for (i = 0; i < hlsl_type_element_count(type); ++i) + { ++ uint32_t element_modifiers; ++ + if (type->class == HLSL_CLASS_ARRAY) + { + elem_semantic_index = semantic_index + + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; ++ element_modifiers = modifiers; ++ force_align = true; + } + else + { +@@ -576,38 +603,41 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block * + semantic = &field->semantic; + elem_semantic_index = semantic->index; + loc = &field->loc; ++ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); ++ force_align = (i == 0); + } if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) return; @@ -9417,13 +12022,14 @@ index 92b5c71c43f..88bec8610cb 100644 + hlsl_block_add_instr(&func->body, &element_load->node); - append_output_copy_recurse(ctx, block, element_load, modifiers, semantic, elem_semantic_index); -+ append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); ++ append_output_copy_recurse(ctx, func, element_load, element_modifiers, ++ semantic, elem_semantic_index, force_align); } } else { - append_output_copy(ctx, block, rhs, modifiers, semantic, semantic_index); -+ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); ++ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align); } } @@ -9442,11 +12048,18 @@ index 92b5c71c43f..88bec8610cb 100644 + hlsl_block_add_instr(&func->body, &load->node); - append_output_copy_recurse(ctx, block, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); ++ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); } bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -@@ -1655,11 +1656,16 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, +@@ -1649,17 +1679,23 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + case HLSL_CLASS_MATRIX: case HLSL_CLASS_ARRAY: case HLSL_CLASS_STRUCT: @@ -9466,7 +12079,50 @@ index 92b5c71c43f..88bec8610cb 100644 case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: -@@ -4050,12 +4056,14 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -4045,17 +4081,57 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + return true; + } + ++static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *cond, *cond_cast, *abs, *neg; ++ struct hlsl_type *float_type; ++ struct hlsl_ir_jump *jump; ++ struct hlsl_block block; ++ ++ if (instr->type != HLSL_IR_JUMP) ++ return false; ++ jump = hlsl_ir_jump(instr); ++ if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) ++ return false; ++ ++ cond = jump->condition.node; ++ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx); ++ ++ hlsl_block_init(&block); ++ ++ if (!(cond_cast = hlsl_new_cast(ctx, cond, float_type, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, cond_cast); ++ ++ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond_cast, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, abs); ++ ++ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(&block, neg); ++ ++ list_move_tail(&instr->entry, &block.instrs); ++ hlsl_src_remove(&jump->condition); ++ hlsl_src_from_node(&jump->condition, neg); ++ jump->type = HLSL_IR_JUMP_DISCARD_NEG; ++ ++ return true; ++} ++ + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { switch (instr->type) { case HLSL_IR_CONSTANT: @@ -9481,7 +12137,17 @@ index 92b5c71c43f..88bec8610cb 100644 if (list_empty(&instr->uses)) { list_remove(&instr->entry); -@@ -4106,7 +4114,7 @@ static void dump_function(struct rb_entry *entry, void *context) +@@ -4088,6 +4164,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); ++ case HLSL_IR_VSIR_INSTRUCTION_REF: ++ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ ++ vkd3d_unreachable(); + } + + return false; +@@ -4106,7 +4185,7 @@ static void dump_function(struct rb_entry *entry, void *context) } } @@ -9490,11 +12156,11 @@ index 92b5c71c43f..88bec8610cb 100644 struct hlsl_ir_node *instr) { if (!deref->rel_offset.node) -@@ -4119,6 +4127,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, +@@ -4119,6 +4198,20 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, return true; } -+void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++static void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ + struct hlsl_scope *scope; + struct hlsl_ir_var *var; @@ -9511,7 +12177,7 @@ index 92b5c71c43f..88bec8610cb 100644 static char get_regset_name(enum hlsl_regset regset) { switch (regset) -@@ -4135,11 +4157,11 @@ static char get_regset_name(enum hlsl_regset regset) +@@ -4135,11 +4228,11 @@ static char get_regset_name(enum hlsl_regset regset) vkd3d_unreachable(); } @@ -9525,7 +12191,17 @@ index 92b5c71c43f..88bec8610cb 100644 { const struct hlsl_reg_reservation *reservation = &var->reg_reservation; unsigned int r; -@@ -4337,11 +4359,23 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop +@@ -4213,6 +4306,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + case HLSL_IR_STATEBLOCK_CONSTANT: + /* Stateblock constants should not appear in the shader program. */ + vkd3d_unreachable(); ++ case HLSL_IR_VSIR_INSTRUCTION_REF: ++ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ ++ vkd3d_unreachable(); + + case HLSL_IR_STORE: + { +@@ -4337,10 +4433,22 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop case HLSL_IR_CONSTANT: case HLSL_IR_STRING_CONSTANT: break; @@ -9537,7 +12213,6 @@ index 92b5c71c43f..88bec8610cb 100644 } } --static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +static void init_var_liveness(struct hlsl_ir_var *var) +{ + if (var->is_uniform || var->is_input_semantic) @@ -9546,11 +12221,10 @@ index 92b5c71c43f..88bec8610cb 100644 + var->last_read = UINT_MAX; +} + -+void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct hlsl_scope *scope; - struct hlsl_ir_var *var; -@@ -4355,16 +4389,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl +@@ -4355,16 +4463,29 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl } LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) @@ -9586,7 +12260,219 @@ index 92b5c71c43f..88bec8610cb 100644 struct register_allocator { struct allocation -@@ -4816,7 +4863,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, +@@ -4372,6 +4493,9 @@ struct register_allocator + uint32_t reg; + unsigned int writemask; + unsigned int first_write, last_read; ++ ++ /* Two allocations with different mode can't share the same register. */ ++ int mode; + } *allocations; + size_t count, capacity; + +@@ -4381,10 +4505,17 @@ struct register_allocator + + /* Total number of registers allocated so far. Used to declare sm4 temp count. */ + uint32_t reg_count; ++ ++ /* Special flag so allocations that can share registers prioritize those ++ * that will result in smaller writemasks. ++ * For instance, a single-register allocation would prefer to share a register ++ * whose .xy components are already allocated (becoming .z) instead of a ++ * register whose .xyz components are already allocated (becoming .w). */ ++ bool prioritize_smaller_writemasks; + }; + + static unsigned int get_available_writemask(const struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_idx) ++ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) + { + unsigned int writemask = VKD3DSP_WRITEMASK_ALL; + size_t i; +@@ -4399,7 +4530,11 @@ static unsigned int get_available_writemask(const struct register_allocator *all + + if (allocation->reg == reg_idx + && first_write < allocation->last_read && last_read > allocation->first_write) ++ { + writemask &= ~allocation->writemask; ++ if (allocation->mode != mode) ++ writemask = 0; ++ } + + if (!writemask) + break; +@@ -4408,8 +4543,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all + return writemask; + } + +-static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) ++static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, ++ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) + { + struct allocation *allocation; + +@@ -4422,6 +4557,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a + allocation->writemask = writemask; + allocation->first_write = first_write; + allocation->last_read = last_read; ++ allocation->mode = mode; + + allocator->reg_count = max(allocator->reg_count, reg_idx + 1); + } +@@ -4431,37 +4567,46 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a + * register, even if they don't use it completely. */ + static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, unsigned int reg_size, +- unsigned int component_count) ++ unsigned int component_count, int mode, bool force_align) + { +- struct hlsl_reg ret = {0}; +- unsigned int writemask; +- uint32_t reg_idx; ++ struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; ++ unsigned int required_size = force_align ? 4 : reg_size; ++ unsigned int pref; + + VKD3D_ASSERT(component_count <= reg_size); + +- for (reg_idx = 0;; ++reg_idx) ++ pref = allocator->prioritize_smaller_writemasks ? 4 : required_size; ++ for (; pref >= required_size; --pref) + { +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); +- +- if (vkd3d_popcount(writemask) >= reg_size) ++ for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) + { +- writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); +- break; ++ unsigned int available_writemask = get_available_writemask(allocator, ++ first_write, last_read, reg_idx, mode); ++ ++ if (vkd3d_popcount(available_writemask) >= pref) ++ { ++ unsigned int writemask = hlsl_combine_writemasks(available_writemask, ++ vkd3d_write_mask_from_component_count(reg_size)); ++ ++ ret.id = reg_idx; ++ ret.writemask = hlsl_combine_writemasks(writemask, ++ vkd3d_write_mask_from_component_count(component_count)); ++ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); ++ return ret; ++ } + } + } + +- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); +- +- ret.id = reg_idx; +- ret.allocation_size = 1; +- ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); +- ret.allocated = true; ++ ret.id = allocator->reg_count; ++ ret.writemask = vkd3d_write_mask_from_component_count(component_count); ++ record_allocation(ctx, allocator, allocator->reg_count, ++ vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); + return ret; + } + + /* Allocate a register with writemask, while reserving reg_writemask. */ + static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) ++ unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4470,11 +4615,12 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + + for (reg_idx = 0;; ++reg_idx) + { +- if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) ++ if ((get_available_writemask(allocator, first_write, last_read, ++ reg_idx, mode) & reg_writemask) == reg_writemask) + break; + } + +- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); ++ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); + + ret.id = reg_idx; + ret.allocation_size = 1; +@@ -4483,8 +4629,8 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct + return ret; + } + +-static bool is_range_available(const struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) ++static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, ++ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) + { + unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; + unsigned int writemask; +@@ -4492,18 +4638,18 @@ static bool is_range_available(const struct register_allocator *allocator, + + for (i = 0; i < (reg_size / 4); ++i) + { +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); + if (writemask != VKD3DSP_WRITEMASK_ALL) + return false; + } +- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4)); ++ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); + if ((writemask & last_reg_mask) != last_reg_mask) + return false; + return true; + } + + static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, +- unsigned int first_write, unsigned int last_read, unsigned int reg_size) ++ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) + { + struct hlsl_reg ret = {0}; + uint32_t reg_idx; +@@ -4511,14 +4657,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo + + for (reg_idx = 0;; ++reg_idx) + { +- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) ++ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) + break; + } + + for (i = 0; i < reg_size / 4; ++i) +- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); ++ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); + if (reg_size % 4) +- record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, first_write, last_read); ++ record_allocation(ctx, allocator, reg_idx + (reg_size / 4), ++ (1u << (reg_size % 4)) - 1, first_write, last_read, mode); + + ret.id = reg_idx; + ret.allocation_size = align(reg_size, 4) / 4; +@@ -4534,9 +4681,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + /* FIXME: We could potentially pack structs or arrays more efficiently... */ + + if (type->class <= HLSL_CLASS_VECTOR) +- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); ++ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); + else +- return allocate_range(ctx, allocator, first_write, last_read, reg_size); ++ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); + } + + static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) +@@ -4715,7 +4862,7 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, + + if (reg_writemask) + instr->reg = allocate_register_with_masks(ctx, allocator, +- instr->index, instr->last_read, reg_writemask, dst_writemask); ++ instr->index, instr->last_read, reg_writemask, dst_writemask, 0); + else + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, + instr->index, instr->last_read, instr->data_type); +@@ -4816,7 +4963,8 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } } @@ -9596,7 +12482,7 @@ index 92b5c71c43f..88bec8610cb 100644 { struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_constant_register *reg; -@@ -4838,6 +4886,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, +@@ -4838,6 +4986,7 @@ static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, memset(reg, 0, sizeof(*reg)); reg->index = component_index / 4; reg->value.f[component_index % 4] = f; @@ -9604,7 +12490,7 @@ index 92b5c71c43f..88bec8610cb 100644 } static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, -@@ -4898,7 +4947,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, +@@ -4898,7 +5047,7 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, vkd3d_unreachable(); } @@ -9613,7 +12499,7 @@ index 92b5c71c43f..88bec8610cb 100644 } break; -@@ -4991,17 +5040,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl +@@ -4991,17 +5140,17 @@ static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_bl ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); @@ -9639,12 +12525,39 @@ index 92b5c71c43f..88bec8610cb 100644 return; } -@@ -5081,9 +5130,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi +@@ -5034,14 +5183,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + { + if (i < bind_count) + { +- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) ++ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } +- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); ++ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + } +- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); ++ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); + } + + var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; +@@ -5064,7 +5213,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + + if (!var->regs[HLSL_REGSET_NUMERIC].allocated) + { +- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); + TRACE("Allocated %s to %s.\n", var->name, + debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); + } +@@ -5081,9 +5230,21 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi * index to all (simultaneously live) variables or intermediate values. Agnostic * as to how many registers are actually available for the current backend, and * does not handle constants. */ -static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) ++static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator = {0}; + struct hlsl_scope *scope; @@ -9662,7 +12575,7 @@ index 92b5c71c43f..88bec8610cb 100644 /* ps_1_* outputs are special and go in temp register 0. */ if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -@@ -5092,8 +5153,7 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio +@@ -5092,22 +5253,53 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio for (i = 0; i < entry_func->parameters.count; ++i) { @@ -9671,8 +12584,11 @@ index 92b5c71c43f..88bec8610cb 100644 + var = entry_func->parameters.vars[i]; if (var->is_output_semantic) { - record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); -@@ -5103,11 +5163,13 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio +- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); ++ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); + break; + } + } } allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); @@ -9680,15 +12596,45 @@ index 92b5c71c43f..88bec8610cb 100644 vkd3d_free(allocator.allocations); + + return allocator.reg_count; ++} ++ ++enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers) ++{ ++ unsigned int i; ++ ++ static const struct ++ { ++ unsigned int modifiers; ++ enum vkd3d_shader_interpolation_mode mode; ++ } ++ modes[] = ++ { ++ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID}, ++ {HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE}, ++ {HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID}, ++ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID}, ++ }; ++ ++ if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) ++ || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT) ++ return VKD3DSIM_CONSTANT; ++ ++ for (i = 0; i < ARRAY_SIZE(modes); ++i) ++ { ++ if ((storage_modifiers & modes[i].modifiers) == modes[i].modifiers) ++ return modes[i].mode; ++ } ++ ++ return VKD3DSIM_LINEAR; } -static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) +static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -+ unsigned int *counter, bool output, bool is_patch_constant_func) ++ struct register_allocator *allocator, bool output, bool optimize, bool is_patch_constant_func) { static const char *const shader_names[] = { -@@ -5120,27 +5182,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5120,27 +5312,28 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var }; enum vkd3d_shader_register_type type; @@ -9726,7 +12672,7 @@ index 92b5c71c43f..88bec8610cb 100644 { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); -@@ -5152,22 +5215,24 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var +@@ -5152,50 +5345,72 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var } else { @@ -9746,6 +12692,13 @@ index 92b5c71c43f..88bec8610cb 100644 + + if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) reg = has_idx ? var->semantic.index : 0; ++ ++ if (semantic == VKD3D_SHADER_SV_TESS_FACTOR_TRIINT) ++ { ++ /* While SV_InsideTessFactor can be declared as 'float' for "tri" ++ * domains, it is allocated as if it was 'float[1]'. */ ++ var->force_align = true; ++ } } if (builtin) @@ -9755,27 +12708,51 @@ index 92b5c71c43f..88bec8610cb 100644 output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg); } else -@@ -5181,21 +5246,23 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + { +- var->regs[HLSL_REGSET_NUMERIC].allocated = true; +- var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; +- var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; +- var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; +- TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', +- var->regs[HLSL_REGSET_NUMERIC], var->data_type)); ++ int mode = (ctx->profile->major_version < 4) ++ ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); ++ unsigned int reg_size = optimize ? var->data_type->dimx : 4; ++ ++ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, ++ UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); ++ ++ TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', ++ var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); } } -static void allocate_semantic_registers(struct hlsl_ctx *ctx) +static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { +- unsigned int input_counter = 0, output_counter = 0; ++ struct register_allocator input_allocator = {0}, output_allocator = {0}; ++ bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; ++ bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; + bool is_patch_constant_func = entry_func == ctx->patch_constant_func; - unsigned int input_counter = 0, output_counter = 0; struct hlsl_ir_var *var; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ input_allocator.prioritize_smaller_writemasks = true; ++ output_allocator.prioritize_smaller_writemasks = true; ++ + LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_input_semantic) - allocate_semantic_register(ctx, var, &input_counter, false); -+ allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); ++ allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader, is_patch_constant_func); if (var->is_output_semantic) - allocate_semantic_register(ctx, var, &output_counter, true); -+ allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); ++ allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader, is_patch_constant_func); } ++ ++ vkd3d_free(input_allocator.allocations); ++ vkd3d_free(output_allocator.allocations); } -static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) @@ -9784,7 +12761,7 @@ index 92b5c71c43f..88bec8610cb 100644 { const struct hlsl_buffer *buffer; -@@ -5203,7 +5270,12 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 +@@ -5203,7 +5418,12 @@ static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint3 { if (buffer->reservation.reg_type == 'b' && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) @@ -9797,7 +12774,7 @@ index 92b5c71c43f..88bec8610cb 100644 } return NULL; } -@@ -5260,7 +5332,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va +@@ -5260,7 +5480,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name); buffer->size = max(buffer->size, var->buffer_offset + var_reg_size); @@ -9806,7 +12783,7 @@ index 92b5c71c43f..88bec8610cb 100644 buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size); } -@@ -5386,8 +5458,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -5386,8 +5606,8 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (reservation->reg_type == 'b') { @@ -9817,7 +12794,7 @@ index 92b5c71c43f..88bec8610cb 100644 unsigned int max_index = get_max_cbuffer_reg_index(ctx); if (buffer->reservation.reg_index > max_index) -@@ -5395,14 +5467,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -5395,14 +5615,14 @@ static void allocate_buffers(struct hlsl_ctx *ctx) "Buffer reservation cb%u exceeds target's maximum (cb%u).", buffer->reservation.reg_index, max_index); @@ -9835,7 +12812,7 @@ index 92b5c71c43f..88bec8610cb 100644 } buffer->reg.space = reservation->reg_space; -@@ -5419,12 +5491,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx) +@@ -5419,12 +5639,12 @@ static void allocate_buffers(struct hlsl_ctx *ctx) else if (!reservation->reg_type) { unsigned int max_index = get_max_cbuffer_reg_index(ctx); @@ -9850,7 +12827,7 @@ index 92b5c71c43f..88bec8610cb 100644 buffer->reg.space = 0; buffer->reg.index = index; -@@ -5491,15 +5563,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum +@@ -5491,15 +5711,15 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum return NULL; } @@ -9869,7 +12846,7 @@ index 92b5c71c43f..88bec8610cb 100644 { if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color") || !ascii_strcasecmp(var->semantic.name, "sv_target"))) -@@ -5786,6 +5858,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere +@@ -5786,6 +6006,26 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere return ret; } @@ -9896,24 +12873,44 @@ index 92b5c71c43f..88bec8610cb 100644 static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) { unsigned int i; -@@ -5834,6 +5926,263 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a +@@ -5834,207 +6074,2961 @@ static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_a } } +-static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) +static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) -+{ + { +- struct hlsl_ir_node *instr, *next; +- struct hlsl_block block; +- struct list *start; + const char *value; -+ + +- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &body->instrs, struct hlsl_ir_node, entry) + if (attr->args_count != 1) -+ { + { +- if (instr->type == HLSL_IR_IF) +- { +- struct hlsl_ir_if *iff = hlsl_ir_if(instr); + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [domain] attribute, but got %u.", attr->args_count); + return; + } -+ + +- remove_unreachable_code(ctx, &iff->then_block); +- remove_unreachable_code(ctx, &iff->else_block); +- } +- else if (instr->type == HLSL_IR_LOOP) +- { +- struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; -+ + +- remove_unreachable_code(ctx, &loop->body); +- } +- else if (instr->type == HLSL_IR_SWITCH) +- { +- struct hlsl_ir_switch *s = hlsl_ir_switch(instr); +- struct hlsl_ir_switch_case *c; + if (!strcmp(value, "isoline")) + ctx->domain = VKD3D_TESSELLATOR_DOMAIN_LINE; + else if (!strcmp(value, "tri")) @@ -9925,28 +12922,48 @@ index 92b5c71c43f..88bec8610cb 100644 + "Invalid tessellator domain \"%s\": expected \"isoline\", \"tri\", or \"quad\".", + value); +} -+ + +- LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) +- { +- remove_unreachable_code(ctx, &c->body); +- } +- } +- } +static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) +{ + const struct hlsl_ir_node *instr; + const struct hlsl_type *type; + const struct hlsl_ir_constant *constant; -+ + +- /* Remove instructions past unconditional jumps. */ +- LIST_FOR_EACH_ENTRY(instr, &body->instrs, struct hlsl_ir_node, entry) + if (attr->args_count != 1) -+ { + { +- struct hlsl_ir_jump *jump; +- +- if (instr->type != HLSL_IR_JUMP) +- continue; + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [outputcontrolpoints] attribute, but got %u.", attr->args_count); + return; + } -+ + +- jump = hlsl_ir_jump(instr); +- if (jump->type != HLSL_IR_JUMP_BREAK && jump->type != HLSL_IR_JUMP_CONTINUE) +- continue; + instr = attr->args[0].node; + type = instr->data_type; -+ + +- if (!(start = list_next(&body->instrs, &instr->entry))) +- break; + if (type->class != HLSL_CLASS_SCALAR + || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT)) + { + struct vkd3d_string_buffer *string; -+ + +- hlsl_block_init(&block); +- list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs)); +- hlsl_block_cleanup(&block); + if ((string = hlsl_type_to_string(ctx, type))) + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong type for argument 0 of [outputcontrolpoints]: expected int or uint, but got %s.", @@ -9954,12 +12971,13 @@ index 92b5c71c43f..88bec8610cb 100644 + hlsl_release_string_buffer(ctx, string); + return; + } -+ + +- break; + if (instr->type != HLSL_IR_CONSTANT) + { + hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [outputcontrolpoints] initializer."); + return; -+ } + } + constant = hlsl_ir_constant(instr); + + if ((type->e.numeric.type == HLSL_TYPE_INT && constant->value.u[0].i < 0) @@ -9968,22 +12986,52 @@ index 92b5c71c43f..88bec8610cb 100644 + "Output control point count must be between 0 and 32."); + + ctx->output_control_point_count = constant->value.u[0].u; -+} -+ + } + +-void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) +static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) -+{ + { +- bool progress; +- +- lower_ir(ctx, lower_matrix_swizzles, body); +- lower_ir(ctx, lower_index_loads, body); + const char *value; -+ + +- lower_ir(ctx, lower_broadcasts, body); +- while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); +- do + if (attr->args_count != 1) -+ { + { +- progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); +- progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected 1 parameter for [outputtopology] attribute, but got %u.", attr->args_count); + return; -+ } -+ + } +- while (progress); +- hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); + +- lower_ir(ctx, lower_narrowing_casts, body); +- lower_ir(ctx, lower_int_dot, body); +- lower_ir(ctx, lower_int_division, body); +- lower_ir(ctx, lower_int_modulus, body); +- lower_ir(ctx, lower_int_abs, body); +- lower_ir(ctx, lower_casts_to_bool, body); +- lower_ir(ctx, lower_float_modulus, body); +- hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); + if (!(value = get_string_argument_value(ctx, attr, 0))) + return; -+ + +- do +- { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); +- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); +- progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, body); +- progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); +- progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); +- progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); +- } while (progress); + if (!strcmp(value, "point")) + ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT; + else if (!strcmp(value, "line")) @@ -9996,12 +13044,20 @@ index 92b5c71c43f..88bec8610cb 100644 + hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE, + "Invalid tessellator output topology \"%s\": " + "expected \"point\", \"line\", \"triangle_cw\", or \"triangle_ccw\".", value); -+} -+ + } + +-static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, +- struct vsir_program *program, bool output, struct hlsl_ir_var *var) +static void parse_partitioning_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr) -+{ + { +- enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; +- enum vkd3d_shader_register_type type; +- struct shader_signature *signature; +- struct signature_element *element; +- unsigned int register_index, mask; + const char *value; -+ + +- if ((!output && !var->last_read) || (output && !var->first_write)) + if (attr->args_count != 1) + { + hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, @@ -10157,57 +13213,121 @@ index 92b5c71c43f..88bec8610cb 100644 + } +} + - static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) - { - struct hlsl_ir_node *instr, *next; -@@ -5890,12 +6239,16 @@ static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *bod - } - } - ++static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body) ++{ ++ struct hlsl_ir_node *instr, *next; ++ struct hlsl_block block; ++ struct list *start; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &body->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->type == HLSL_IR_IF) ++ { ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ ++ remove_unreachable_code(ctx, &iff->then_block); ++ remove_unreachable_code(ctx, &iff->else_block); ++ } ++ else if (instr->type == HLSL_IR_LOOP) ++ { ++ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); ++ ++ remove_unreachable_code(ctx, &loop->body); ++ } ++ else if (instr->type == HLSL_IR_SWITCH) ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ { ++ remove_unreachable_code(ctx, &c->body); ++ } ++ } ++ } ++ ++ /* Remove instructions past unconditional jumps. */ ++ LIST_FOR_EACH_ENTRY(instr, &body->instrs, struct hlsl_ir_node, entry) ++ { ++ struct hlsl_ir_jump *jump; ++ ++ if (instr->type != HLSL_IR_JUMP) ++ continue; ++ ++ jump = hlsl_ir_jump(instr); ++ if (jump->type != HLSL_IR_JUMP_BREAK && jump->type != HLSL_IR_JUMP_CONTINUE) ++ continue; ++ ++ if (!(start = list_next(&body->instrs, &instr->entry))) ++ break; ++ ++ hlsl_block_init(&block); ++ list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs)); ++ hlsl_block_cleanup(&block); ++ ++ break; ++ } ++} ++ +void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body) +{ + lower_ir(ctx, lower_index_loads, body); +} + - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - { - bool progress; - - lower_ir(ctx, lower_matrix_swizzles, body); -- lower_ir(ctx, lower_index_loads, body); - - lower_ir(ctx, lower_broadcasts, body); - while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); -@@ -5928,22 +6281,104 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - } while (progress); - } - --static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, -- struct vsir_program *program, bool output, struct hlsl_ir_var *var) ++void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) ++{ ++ bool progress; ++ ++ lower_ir(ctx, lower_matrix_swizzles, body); ++ ++ lower_ir(ctx, lower_broadcasts, body); ++ while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); ++ do ++ { ++ progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL); ++ progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL); ++ } ++ while (progress); ++ hlsl_transform_ir(ctx, split_matrix_copies, body, NULL); ++ ++ lower_ir(ctx, lower_narrowing_casts, body); ++ lower_ir(ctx, lower_int_dot, body); ++ lower_ir(ctx, lower_int_division, body); ++ lower_ir(ctx, lower_int_modulus, body); ++ lower_ir(ctx, lower_int_abs, body); ++ lower_ir(ctx, lower_casts_to_bool, body); ++ lower_ir(ctx, lower_float_modulus, body); ++ hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL); ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, body, NULL); ++ progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, body, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, body); ++ progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); ++ progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL); ++ } while (progress); ++} ++ +static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct shader_signature *signature, bool output, struct hlsl_ir_var *var) - { - enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) ++{ ++ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + enum vkd3d_shader_component_type component_type; + unsigned int register_index, mask, use_mask; + const char *name = var->semantic.name; - enum vkd3d_shader_register_type type; -- struct shader_signature *signature; - struct signature_element *element; -- unsigned int register_index, mask; - -- if ((!output && !var->last_read) || (output && !var->first_write)) -- return; ++ enum vkd3d_shader_register_type type; ++ struct signature_element *element; ++ + if (hlsl_version_ge(ctx, 4, 0)) + { + struct vkd3d_string_buffer *string; + bool has_idx, ret; - -- if (output) -- signature = &program->output_signature; -+ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, -+ ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, -+ output, signature == &program->patch_constant_signature); ++ ++ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, ++ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); + VKD3D_ASSERT(ret); + if (sysval == ~0u) + return; @@ -10215,16 +13335,15 @@ index 92b5c71c43f..88bec8610cb 100644 + if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) + { + register_index = has_idx ? var->semantic.index : ~0u; ++ mask = (1u << var->data_type->dimx) - 1; + } + else + { + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ mask = var->regs[HLSL_REGSET_NUMERIC].writemask; + } + -+ /* NOTE: remember to change this to the actually allocated mask once -+ * we start optimizing interstage signatures. */ -+ mask = (1u << var->data_type->dimx) - 1; + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + + switch (var->data_type->e.numeric.type) @@ -10259,8 +13378,7 @@ index 92b5c71c43f..88bec8610cb 100644 + else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) + name = "SV_Position"; + } - else -- signature = &program->input_signature; ++ else + { + if ((!output && !var->last_read) || (output && !var->first_write)) + return; @@ -10286,39 +13404,36 @@ index 92b5c71c43f..88bec8610cb 100644 + } + + mask = (1 << var->data_type->dimx) - 1; ++ ++ if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output ++ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) ++ { ++ if (var->data_type->dimx > 1) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "PSIZE output must have only 1 component in this shader model."); ++ /* For some reason the writemask has all components set. */ ++ mask = VKD3DSP_WRITEMASK_ALL; ++ } ++ if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 ++ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, ++ "FOG output must have only 1 component in this shader model."); ++ + use_mask = mask; /* FIXME: retrieve use mask accurately. */ + component_type = VKD3D_SHADER_COMPONENT_FLOAT; + } - - if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, - signature->element_count + 1, sizeof(*signature->elements))) -@@ -5952,73 +6387,986 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - return; - } - element = &signature->elements[signature->element_count++]; ++ ++ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, ++ signature->element_count + 1, sizeof(*signature->elements))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element = &signature->elements[signature->element_count++]; + memset(element, 0, sizeof(*element)); - -- if (!hlsl_sm1_register_from_semantic(&program->shader_version, -- var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ + if (!(element->semantic_name = vkd3d_strdup(name))) - { -- unsigned int usage_idx; -- D3DDECLUSAGE usage; -- bool ret; -- -- register_index = var->regs[HLSL_REGSET_NUMERIC].id; -- -- ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); -- VKD3D_ASSERT(ret); -- /* With the exception of vertex POSITION output, none of these are -- * system values. Pixel POSITION input is not equivalent to -- * SV_Position; the closer equivalent is VPOS, which is not declared -- * as a semantic. */ -- if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -- && output && usage == D3DDECLUSAGE_POSITION) -- sysval = VKD3D_SHADER_SV_POSITION; -- } -- mask = (1 << var->data_type->dimx) - 1; ++ { + --signature->element_count; + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; @@ -10338,26 +13453,68 @@ index 92b5c71c43f..88bec8610cb 100644 +static void generate_vsir_signature(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_function_decl *func) +{ ++ bool is_domain = program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; ++ bool is_patch_constant_func = func == ctx->patch_constant_func; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { -+ if (func == ctx->patch_constant_func) ++ if (var->is_input_semantic) + { -+ generate_vsir_signature_entry(ctx, program, -+ &program->patch_constant_signature, var->is_output_semantic, var); ++ if (is_patch_constant_func) ++ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, true, var); ++ else if (is_domain) ++ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, false, var); ++ else ++ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, false, var); + } -+ else ++ if (var->is_output_semantic) + { -+ if (var->is_input_semantic) -+ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); -+ if (var->is_output_semantic) -+ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); ++ if (is_patch_constant_func) ++ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, true, var); ++ else ++ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, false, var); + } + } +} + -+static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) ++static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) ++{ ++ if (hlsl_version_lt(ctx, 4, 0)) ++ return VKD3D_DATA_FLOAT; ++ ++ if (type->class == HLSL_CLASS_ARRAY) ++ return vsir_data_type_from_hlsl_type(ctx, type->e.array.type); ++ if (type->class == HLSL_CLASS_STRUCT) ++ return VKD3D_DATA_MIXED; ++ if (type->class <= HLSL_CLASS_LAST_NUMERIC) ++ { ++ switch (type->e.numeric.type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ return VKD3D_DATA_DOUBLE; ++ case HLSL_TYPE_FLOAT: ++ return VKD3D_DATA_FLOAT; ++ case HLSL_TYPE_HALF: ++ return VKD3D_DATA_HALF; ++ case HLSL_TYPE_INT: ++ return VKD3D_DATA_INT; ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ return VKD3D_DATA_UINT; ++ } ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx, ++ const struct hlsl_ir_node *instr) ++{ ++ return vsir_data_type_from_hlsl_type(ctx, instr->data_type); ++} ++ ++static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) +{ + uint32_t swizzle; + @@ -10515,6 +13672,296 @@ index 92b5c71c43f..88bec8610cb 100644 + return ins; +} + ++static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src, ++ struct hlsl_ctx *ctx, const struct hlsl_constant_value *value, ++ enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask) ++{ ++ unsigned int i, j; ++ ++ vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0); ++ if (width == 1) ++ { ++ src->reg.u.immconst_u32[0] = value->u[0].u; ++ return; ++ } ++ ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ for (i = 0, j = 0; i < 4; ++i) ++ { ++ if ((map_writemask & (1u << i)) && (j < width)) ++ src->reg.u.immconst_u32[i] = value->u[j++].u; ++ else ++ src->reg.u.immconst_u32[i] = 0; ++ } ++} ++ ++static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, ++ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask) ++{ ++ struct hlsl_ir_constant *constant; ++ ++ if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT) ++ { ++ /* In SM4 constants are inlined */ ++ constant = hlsl_ir_constant(instr); ++ vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, ++ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); ++ } ++ else ++ { ++ vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); ++ src->reg.idx[0].offset = instr->reg.id; ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); ++ } ++} ++ ++static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) ++{ ++ const struct hlsl_ir_var *var = deref->var; ++ unsigned int offset_const_deref; ++ ++ reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP; ++ reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ ++ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ ++ if (!var->indexable) ++ { ++ offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx[0].offset += offset_const_deref / 4; ++ reg->idx_count = 1; ++ } ++ else ++ { ++ offset_const_deref = deref->const_offset; ++ reg->idx[1].offset = offset_const_deref / 4; ++ reg->idx_count = 2; ++ ++ if (deref->rel_offset.node) ++ { ++ struct vkd3d_shader_src_param *idx_src; ++ ++ if (!(idx_src = vsir_program_get_src_params(program, 1))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return false; ++ } ++ memset(idx_src, 0, sizeof(*idx_src)); ++ reg->idx[1].rel_addr = idx_src; ++ ++ vsir_src_from_hlsl_node(idx_src, ctx, deref->rel_offset.node, VKD3DSP_WRITEMASK_ALL); ++ } ++ } ++ ++ *writemask = 0xf & (0xf << (offset_const_deref % 4)); ++ if (var->regs[HLSL_REGSET_NUMERIC].writemask) ++ *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); ++ return true; ++} ++ ++static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref); ++ const struct hlsl_ir_var *var = deref->var; ++ ++ if (var->is_uniform) ++ { ++ enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref); ++ ++ if (regset == HLSL_REGSET_TEXTURES) ++ { ++ reg->type = VKD3DSPR_RESOURCE; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_UAVS) ++ { ++ reg->type = VKD3DSPR_UAV; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else if (regset == HLSL_REGSET_SAMPLERS) ++ { ++ reg->type = VKD3DSPR_SAMPLER; ++ reg->dimension = VSIR_DIMENSION_NONE; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); ++ *writemask = VKD3DSP_WRITEMASK_ALL; ++ } ++ else ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; ++ ++ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); ++ reg->type = VKD3DSPR_CONSTBUFFER; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ if (vkd3d_shader_ver_ge(version, 5, 1)) ++ { ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ ++ reg->idx[2].offset = offset / 4; ++ reg->idx_count = 3; ++ } ++ else ++ { ++ reg->idx[0].offset = var->buffer->reg.index; ++ reg->idx[1].offset = offset / 4; ++ reg->idx_count = 2; ++ } ++ *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); ++ } ++ } ++ else if (var->is_input_semantic) ++ { ++ bool has_idx; ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0].offset = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ VKD3D_ASSERT(hlsl_reg.allocated); ++ ++ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ reg->type = VKD3DSPR_PATCHCONST; ++ else ++ reg->type = VKD3DSPR_INPUT; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ reg->idx[0].offset = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else if (var->is_output_semantic) ++ { ++ bool has_idx; ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx)) ++ { ++ unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); ++ ++ if (has_idx) ++ { ++ reg->idx[0].offset = var->semantic.index + offset / 4; ++ reg->idx_count = 1; ++ } ++ ++ if (shader_sm4_is_scalar_register(reg)) ++ reg->dimension = VSIR_DIMENSION_SCALAR; ++ else ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); ++ } ++ else ++ { ++ struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); ++ ++ VKD3D_ASSERT(hlsl_reg.allocated); ++ reg->type = VKD3DSPR_OUTPUT; ++ reg->dimension = VSIR_DIMENSION_VEC4; ++ reg->idx[0].offset = hlsl_reg.id; ++ reg->idx_count = 1; ++ *writemask = hlsl_reg.writemask; ++ } ++ } ++ else ++ { ++ return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref); ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref, ++ unsigned int dst_writemask, const struct vkd3d_shader_location *loc) ++{ ++ uint32_t writemask; ++ ++ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) ++ return false; ++ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); ++ return true; ++} ++ ++static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program, ++ struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref, ++ const struct vkd3d_shader_location *loc, unsigned int writemask) ++{ ++ uint32_t reg_writemask; ++ ++ if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref)) ++ return false; ++ dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask); ++ return true; ++} ++ ++static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, ++ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) ++{ ++ VKD3D_ASSERT(instr->reg.allocated); ++ vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); ++ dst->reg.idx[0].offset = instr->reg.id; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = instr->reg.writemask; ++} ++ +static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) +{ @@ -10532,7 +13979,7 @@ index 92b5c71c43f..88bec8610cb 100644 + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = constant->reg.id; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); ++ src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); @@ -10540,10 +13987,29 @@ index 92b5c71c43f..88bec8610cb 100644 + dst_param->write_mask = instr->reg.writemask; +} + ++static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr) ++{ ++ struct vkd3d_shader_src_param *src_param; ++ struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) ++ return; ++ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; ++ ++ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); ++ ++ src_param = &ins->src[0]; ++ vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++} ++ +/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ -+static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod, -+ bool map_src_swizzles) ++static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, ++ uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) +{ + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; @@ -10564,9 +14030,7 @@ index 92b5c71c43f..88bec8610cb 100644 + return; + + dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->write_mask = instr->reg.writemask; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); + dst_param->modifiers = dst_mod; + + for (i = 0; i < src_count; ++i) @@ -10574,9 +14038,7 @@ index 92b5c71c43f..88bec8610cb 100644 + struct hlsl_ir_node *operand = expr->operands[i].node; + + src_param = &ins->src[i]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = operand->reg.id; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, ++ vsir_src_from_hlsl_node(src_param, ctx, operand, + map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = src_mod; + } @@ -10598,7 +14060,7 @@ index 92b5c71c43f..88bec8610cb 100644 + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(operand); + -+ src_swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); ++ src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); + for (i = 0; i < 4; ++i) + { + if (instr->reg.writemask & (1u << i)) @@ -10644,7 +14106,7 @@ index 92b5c71c43f..88bec8610cb 100644 + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + if (ctx->profile->major_version < 3) + { @@ -10686,13 +14148,13 @@ index 92b5c71c43f..88bec8610cb 100644 + /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_DOUBLE: + if (ctx->double_as_float_alias) + { -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + } + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, @@ -10717,12 +14179,9 @@ index 92b5c71c43f..88bec8610cb 100644 + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ + case HLSL_TYPE_BOOL: -+ hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer."); -+ break; ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; + + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer."); @@ -10739,7 +14198,7 @@ index 92b5c71c43f..88bec8610cb 100644 + case HLSL_TYPE_FLOAT: + if (ctx->double_as_float_alias) + { -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + } + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, @@ -10779,7 +14238,7 @@ index 92b5c71c43f..88bec8610cb 100644 + switch (expr->op) + { + case HLSL_OP1_ABS: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); + break; + + case HLSL_OP1_CAST: @@ -10791,11 +14250,11 @@ index 92b5c71c43f..88bec8610cb 100644 + break; + + case HLSL_OP1_DSX: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); + break; + + case HLSL_OP1_DSY: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); + break; + + case HLSL_OP1_EXP2: @@ -10807,7 +14266,7 @@ index 92b5c71c43f..88bec8610cb 100644 + break; + + case HLSL_OP1_NEG: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); + break; + + case HLSL_OP1_RCP: @@ -10815,7 +14274,7 @@ index 92b5c71c43f..88bec8610cb 100644 + break; + + case HLSL_OP1_REINTERPRET: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + break; + + case HLSL_OP1_RSQ: @@ -10823,7 +14282,7 @@ index 92b5c71c43f..88bec8610cb 100644 + break; + + case HLSL_OP1_SAT: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); + break; + + case HLSL_OP1_SIN_REDUCED: @@ -10832,18 +14291,18 @@ index 92b5c71c43f..88bec8610cb 100644 + break; + + case HLSL_OP2_ADD: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); + break; + + case HLSL_OP2_DOT: + switch (expr->operands[0].node->data_type->dimx) + { + case 3: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); + break; + + case 4: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); + break; + + default: @@ -10853,43 +14312,43 @@ index 92b5c71c43f..88bec8610cb 100644 + break; + + case HLSL_OP2_MAX: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_MIN: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_MUL: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); + break; + + case HLSL_OP1_FRACT: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_AND: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_OR: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_SLT: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); + break; + + case HLSL_OP3_CMP: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); + break; + + case HLSL_OP3_DP2ADD: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); + break; + + case HLSL_OP3_MAD: -+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); + break; + + default: @@ -10915,6 +14374,8 @@ index 92b5c71c43f..88bec8610cb 100644 + + if (deref->var->is_output_semantic) + { ++ const char *semantic_name = deref->var->semantic.name; ++ + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; @@ -10924,7 +14385,7 @@ index 92b5c71c43f..88bec8610cb 100644 + type = VKD3DSPR_TEMP; + register_index = 0; + } -+ else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, ++ else if (!sm1_register_from_semantic_name(&version, semantic_name, + deref->var->semantic.index, true, &type, ®ister_index)) + { + VKD3D_ASSERT(reg.allocated); @@ -10933,6 +14394,14 @@ index 92b5c71c43f..88bec8610cb 100644 + } + else + writemask = (1u << deref->var->data_type->dimx) - 1; ++ ++ if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") ++ || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) ++ { ++ /* These are always 1-component, but for some reason are written ++ * with a writemask containing all components. */ ++ writemask = VKD3DSP_WRITEMASK_ALL; ++ } + } + else + VKD3D_ASSERT(reg.allocated); @@ -10954,13 +14423,9 @@ index 92b5c71c43f..88bec8610cb 100644 + uint32_t register_index; + unsigned int writemask; + struct hlsl_reg reg; - -- memset(element, 0, sizeof(*element)); -- if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) ++ + if (hlsl_type_is_resource(deref->var->data_type)) - { -- --signature->element_count; -- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ { + unsigned int sampler_offset; + + type = VKD3DSPR_COMBINED_SAMPLER; @@ -11009,7 +14474,7 @@ index 92b5c71c43f..88bec8610cb 100644 + + vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = register_index; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(writemask, dst_writemask); ++ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); @@ -11025,7 +14490,7 @@ index 92b5c71c43f..88bec8610cb 100644 + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) - return; ++ return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); @@ -11088,9 +14553,7 @@ index 92b5c71c43f..88bec8610cb 100644 + dst_param->write_mask = instr->reg.writemask; + + src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = coords->reg.id; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); + + sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, + VKD3DSP_WRITEMASK_ALL, &ins->location); @@ -11098,47 +14561,31 @@ index 92b5c71c43f..88bec8610cb 100644 + if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) + { + src_param = &ins->src[2]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = ddx->reg.id; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL); + + src_param = &ins->src[3]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = ddy->reg.id; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL); - } -- element->semantic_index = var->semantic.index; -- element->sysval_semantic = sysval; -- element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -- element->register_index = register_index; -- element->target_location = register_index; -- element->register_count = 1; -- element->mask = mask; -- element->used_mask = mask; -- if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) -- element->interpolation_mode = VKD3DSIM_LINEAR; - } - --static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) -+static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, -+ struct hlsl_ir_swizzle *swizzle_instr) - { -- struct hlsl_ir_var *var; ++ vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL); ++ } ++} ++ ++static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) ++{ + struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_src_param *src_param; + struct vkd3d_shader_instruction *ins; + uint32_t swizzle; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) + return; + + dst_param = &ins->dst[0]; -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + dst_param->reg.idx[0].offset = instr->reg.id; ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + dst_param->write_mask = instr->reg.writemask; + + swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); @@ -11147,8 +14594,10 @@ index 92b5c71c43f..88bec8610cb 100644 + swizzle = vsir_swizzle_from_hlsl(swizzle); + + src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); ++ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1); + src_param->reg.idx[0].offset = val->reg.id; ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->swizzle = swizzle; +} + @@ -11166,9 +14615,7 @@ index 92b5c71c43f..88bec8610cb 100644 + sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask); + + src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = rhs->reg.id; -+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(rhs->reg.writemask, ins->dst[0].write_mask); ++ vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask); +} + +static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, @@ -11180,11 +14627,7 @@ index 92b5c71c43f..88bec8610cb 100644 + struct vkd3d_shader_instruction *ins; + + if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) - { -- if (var->is_input_semantic) -- sm1_generate_vsir_signature_entry(ctx, program, false, var); -- if (var->is_output_semantic) -- sm1_generate_vsir_signature_entry(ctx, program, true, var); ++ { + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) + return; + @@ -11207,7 +14650,6 @@ index 92b5c71c43f..88bec8610cb 100644 + struct vkd3d_shader_src_param *src_param; + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; -+ uint32_t swizzle; + + if (hlsl_version_lt(ctx, 2, 1)) + { @@ -11220,19 +14662,12 @@ index 92b5c71c43f..88bec8610cb 100644 + return; + ins->flags = VKD3D_SHADER_REL_OP_NE; + -+ swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask); -+ swizzle = vsir_swizzle_from_hlsl(swizzle); -+ + src_param = &ins->src[0]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = condition->reg.id; -+ src_param->swizzle = swizzle; ++ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = 0; + + src_param = &ins->src[1]; -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src_param->reg.idx[0].offset = condition->reg.id; -+ src_param->swizzle = swizzle; ++ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = VKD3DSPSM_NEG; + + sm1_generate_vsir_block(ctx, &iff->then_block, program); @@ -11295,39 +14730,42 @@ index 92b5c71c43f..88bec8610cb 100644 + break; + + case HLSL_IR_SWIZZLE: -+ sm1_generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); ++ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + + default: + hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); + break; + } - } - } - --/* OBJECTIVE: Translate all the information from ctx and entry_func to the -- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -- * without relying on ctx and entry_func. */ - static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) - { - struct vkd3d_shader_version version = {0}; - struct vkd3d_bytecode_buffer buffer = {0}; ++ } ++} ++ ++static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) ++{ ++ struct vkd3d_shader_version version = {0}; ++ struct vkd3d_bytecode_buffer buffer = {0}; + struct hlsl_block block; - - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -- if (!vsir_program_init(program, NULL, &version, 0)) -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -@@ -6034,7 +7382,37 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - ctab->code = buffer.data; - ctab->size = buffer.size; - -- sm1_generate_vsir_signature(ctx, program); ++ ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ write_sm1_uniforms(ctx, &buffer); ++ if (buffer.status) ++ { ++ vkd3d_free(buffer.data); ++ ctx->result = buffer.status; ++ return; ++ } ++ ctab->code = buffer.data; ++ ctab->size = buffer.size; ++ + generate_vsir_signature(ctx, program, entry_func); + + hlsl_block_init(&block); @@ -11338,36 +14776,1282 @@ index 92b5c71c43f..88bec8610cb 100644 + sm1_generate_vsir_block(ctx, &entry_func->body, program); +} + -+/* OBJECTIVE: Translate all the information from ctx and entry_func to the -+ * vsir_program, so it can be used as input to tpf_compile() without relying -+ * on ctx and entry_func. */ -+static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ uint64_t config_flags, struct vsir_program *program) ++static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) +{ -+ struct vkd3d_shader_version version = {0}; ++ struct vkd3d_shader_location *loc; ++ struct hlsl_ir_node *vsir_instr; + -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; ++ loc = &program->instructions.elements[program->instructions.count - 1].location; + -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) ++ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ hlsl_block_add_instr(block, vsir_instr); ++} ++ ++static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_node *instr) ++{ ++ struct vkd3d_shader_location *loc; ++ struct hlsl_ir_node *vsir_instr; ++ ++ loc = &program->instructions.elements[program->instructions.count - 1].location; ++ ++ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, ++ program->instructions.count - 1, instr->data_type, &instr->reg, loc))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + -+ generate_vsir_signature(ctx, program, entry_func); ++ list_add_before(&instr->entry, &vsir_instr->entry); ++ hlsl_replace_node(instr, vsir_instr); ++} ++ ++static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, ++ const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, ++ const struct vkd3d_shader_location *loc) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const bool output = var->is_output_semantic; ++ enum vkd3d_shader_sysval_semantic semantic; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_instruction *ins; ++ enum vkd3d_shader_register_type type; ++ enum vkd3d_shader_opcode opcode; ++ unsigned int idx = 0; ++ uint32_t write_mask; ++ bool has_idx; ++ ++ sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ++ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); ++ if (semantic == ~0u) ++ semantic = VKD3D_SHADER_SV_NONE; ++ ++ if (var->is_input_semantic) ++ { ++ switch (semantic) ++ { ++ case VKD3D_SHADER_SV_NONE: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; ++ break; ++ ++ case VKD3D_SHADER_SV_INSTANCE_ID: ++ case VKD3D_SHADER_SV_IS_FRONT_FACE: ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: ++ case VKD3D_SHADER_SV_SAMPLE_INDEX: ++ case VKD3D_SHADER_SV_VERTEX_ID: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV; ++ break; ++ ++ default: ++ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV; ++ break; ++ } ++ } ++ else ++ { ++ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) ++ opcode = VKD3DSIH_DCL_OUTPUT; ++ else ++ opcode = VKD3DSIH_DCL_OUTPUT_SIV; ++ } ++ ++ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx)) ++ { ++ if (has_idx) ++ idx = var->semantic.index; ++ write_mask = (1u << var->data_type->dimx) - 1; ++ } ++ else ++ { ++ if (output) ++ type = VKD3DSPR_OUTPUT; ++ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ type = VKD3DSPR_PATCHCONST; ++ else ++ type = VKD3DSPR_INPUT; ++ ++ has_idx = true; ++ idx = var->regs[HLSL_REGSET_NUMERIC].id; ++ write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0))) ++ return; ++ ++ if (opcode == VKD3DSIH_DCL_OUTPUT) ++ { ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE ++ || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT); ++ dst_param = &ins->declaration.dst; ++ } ++ else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) ++ { ++ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); ++ dst_param = &ins->declaration.dst; ++ } ++ else ++ { ++ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); ++ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic, ++ var->semantic.index); ++ dst_param = &ins->declaration.register_semantic.reg; ++ } ++ ++ if (has_idx) ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); ++ dst_param->reg.idx[0].offset = idx; ++ } ++ else ++ { ++ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); ++ } ++ ++ if (shader_sm4_is_scalar_register(&dst_param->reg)) ++ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; ++ else ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ dst_param->write_mask = write_mask; ++ ++ if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) ++ ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); ++ ++ add_last_vsir_instr_to_block(ctx, program, block); ++} ++ ++static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, ++ uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) ++{ ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_TEMPS, 0, 0))) ++ return; ++ ++ ins->declaration.count = temp_count; ++ ++ add_last_vsir_instr_to_block(ctx, program, block); ++} ++ ++static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_block *block, uint32_t idx, ++ uint32_t size, uint32_t comp_count, const struct vkd3d_shader_location *loc) ++{ ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_INDEXABLE_TEMP, 0, 0))) ++ return; ++ ++ ins->declaration.indexable_temp.register_idx = idx; ++ ins->declaration.indexable_temp.register_size = size; ++ ins->declaration.indexable_temp.alignment = 0; ++ ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; ++ ins->declaration.indexable_temp.component_count = comp_count; ++ ins->declaration.indexable_temp.has_function_scope = false; ++ ++ add_last_vsir_instr_to_block(ctx, program, block); ++} ++ ++static bool type_is_float(const struct hlsl_type *type) ++{ ++ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; ++} ++ ++static bool type_is_integer(const struct hlsl_type *type) ++{ ++ return type->e.numeric.type == HLSL_TYPE_BOOL ++ || type->e.numeric.type == HLSL_TYPE_INT ++ || type->e.numeric.type == HLSL_TYPE_UINT; ++} ++ ++static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program, ++ const struct hlsl_ir_expr *expr, uint32_t bits) ++{ ++ struct hlsl_ir_node *operand = expr->operands[0].node; ++ const struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct hlsl_constant_value value = {0}; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(instr->reg.allocated); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2))) ++ return; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask); ++ ++ value.u[0].u = bits; ++ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0); ++} ++ ++static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr) ++{ ++ const struct hlsl_ir_node *arg1 = expr->operands[0].node; ++ const struct hlsl_type *dst_type = expr->node.data_type; ++ const struct hlsl_type *src_type = arg1->data_type; ++ ++ static const union ++ { ++ uint32_t u; ++ float f; ++ } one = { .f = 1.0 }; ++ ++ /* Narrowing casts were already lowered. */ ++ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); ++ ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u); ++ return true; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); ++ return false; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_INT: ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); ++ return true; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); ++ return false; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_UINT: ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); ++ return true; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); ++ return false; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ break; ++ ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); ++ return false; ++ ++ case HLSL_TYPE_BOOL: ++ /* Casts to bool should have already been lowered. */ ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, ++ enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx) ++{ ++ struct vkd3d_shader_dst_param *dst_param, *null_param; ++ const struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int i, src_count; ++ ++ VKD3D_ASSERT(instr->reg.allocated); ++ ++ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) ++ { ++ if (expr->operands[i].node) ++ src_count = i + 1; ++ } ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count))) ++ return; ++ ++ dst_param = &ins->dst[dst_idx]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ null_param = &ins->dst[1 - dst_idx]; ++ vsir_dst_param_init(null_param, VKD3DSPR_NULL, VKD3D_DATA_FLOAT, 0); ++ null_param->reg.dimension = VSIR_DIMENSION_NONE; ++ ++ for (i = 0; i < src_count; ++i) ++ vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask); ++} ++ ++static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, ++ struct vsir_program *program, const struct hlsl_ir_expr *expr) ++{ ++ struct hlsl_ir_node *operand = expr->operands[0].node; ++ const struct hlsl_ir_node *instr = &expr->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct hlsl_constant_value value = {0}; ++ struct vkd3d_shader_instruction *ins; ++ ++ VKD3D_ASSERT(type_is_float(expr->node.data_type)); ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2))) + return; + +- if (output) +- signature = &program->output_signature; +- else +- signature = &program->input_signature; ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ value.u[0].f = 1.0f; ++ value.u[1].f = 1.0f; ++ value.u[2].f = 1.0f; ++ value.u[3].f = 1.0f; ++ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, ++ VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); ++ ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); ++} ++ ++static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name) ++{ ++ const struct hlsl_type *dst_type = expr->node.data_type; ++ const struct hlsl_type *src_type = NULL; ++ ++ VKD3D_ASSERT(expr->node.reg.allocated); ++ if (expr->operands[0].node) ++ src_type = expr->operands[0].node->data_type; ++ ++ switch (expr->op) ++ { ++ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: ++ sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr); ++ return true; ++ ++ case HLSL_OP1_ABS: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); ++ return true; ++ ++ case HLSL_OP1_BIT_NOT: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_CAST: ++ return sm4_generate_vsir_instr_expr_cast(ctx, program, expr); ++ ++ case HLSL_OP1_CEIL: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_COS: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1); ++ return true; ++ ++ case HLSL_OP1_DSX: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSX_COARSE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSX_FINE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSY: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSY_COARSE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_DSY_FINE: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_EXP2: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_F16TOF32: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_F32TOF16: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_FLOOR: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_FRACT: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_LOG2: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_LOGIC_NOT: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_NEG: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP1_RCP: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ /* SM5 comes with a RCP opcode */ ++ if (hlsl_version_ge(ctx, 5, 0)) ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true); ++ else ++ sm4_generate_vsir_rcp_using_div(ctx, program, expr); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP1_REINTERPRET: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_ROUND: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_RSQ: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_SAT: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); ++ return true; ++ ++ case HLSL_OP1_SIN: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0); ++ return true; ++ ++ case HLSL_OP1_SQRT: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true); ++ return true; ++ ++ case HLSL_OP1_TRUNC: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_ADD: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_BIT_AND: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_BIT_OR: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_BIT_XOR: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_DIV: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_DOT: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ switch (expr->operands[0].node->data_type->dimx) ++ { ++ case 4: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); ++ return true; ++ ++ case 3: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); ++ return true; ++ ++ case 2: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false); ++ return true; ++ ++ case 1: ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_EQUAL: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_GEQUAL: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_LESS: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_LOGIC_AND: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_LOGIC_OR: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP2_LSHIFT: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true); ++ return true; ++ ++ case HLSL_OP3_MAD: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MAX: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MIN: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MOD: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_UINT: ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_MUL: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ /* Using IMUL instead of UMUL because we're taking the low ++ * bits, and the native compiler generates IMUL. */ ++ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name); ++ return false; ++ } ++ ++ case HLSL_OP2_NEQUAL: ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ ++ switch (src_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true); ++ return true; ++ ++ case HLSL_TYPE_BOOL: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", ++ debug_hlsl_type(ctx, src_type)); ++ return false; ++ } ++ ++ case HLSL_OP2_RSHIFT: ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, ++ dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true); ++ return true; ++ ++ case HLSL_OP3_TERNARY: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true); ++ return true; ++ ++ default: ++ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); ++ return false; ++ } ++} ++ ++static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_store *store) ++{ ++ struct hlsl_ir_node *instr = &store->node; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct vkd3d_shader_src_param *src_param; ++ struct vkd3d_shader_instruction *ins; ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) ++ return false; ++ ++ dst_param = &ins->dst[0]; ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ dst_param, &store->lhs, &instr->loc, store->writemask)) ++ return false; ++ ++ src_param = &ins->src[0]; ++ vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask); ++ ++ return true; ++} ++ ++/* Does this variable's data come directly from the API user, rather than ++ * being temporary or from a previous shader stage? I.e. is it a uniform or ++ * VS input? */ ++static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) ++{ ++ if (var->is_uniform) ++ return true; ++ ++ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; ++} ++ ++static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) ++{ ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ const struct hlsl_type *type = load->node.data_type; ++ struct vkd3d_shader_dst_param *dst_param; ++ struct hlsl_ir_node *instr = &load->node; ++ struct vkd3d_shader_instruction *ins; ++ struct hlsl_constant_value value; ++ ++ VKD3D_ASSERT(hlsl_is_numeric_type(type)); ++ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) ++ { ++ /* Uniform bools can be specified as anything, but internal bools ++ * always have 0 for false and ~0 for true. Normalise that here. */ ++ ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3))) ++ return false; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) ++ return false; ++ ++ memset(&value, 0xff, sizeof(value)); ++ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, ++ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ memset(&value, 0x00, sizeof(value)); ++ vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value, ++ VKD3D_DATA_UINT, type->dimx, dst_param->write_mask); ++ } ++ else ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) ++ return false; ++ ++ dst_param = &ins->dst[0]; ++ vsir_dst_from_hlsl_node(dst_param, ctx, instr); ++ ++ if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, ++ &ins->src[0], &load->src, dst_param->write_mask, &instr->loc)) ++ return false; ++ } ++ return true; ++} ++ ++static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_resource_store *store) ++{ ++ struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource); ++ struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; ++ struct hlsl_ir_node *instr = &store->node; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int writemask; + +- if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, +- signature->element_count + 1, sizeof(*signature->elements))) ++ if (!store->resource.var->is_uniform) + { +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; ++ hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable."); ++ return false; + } +- element = &signature->elements[signature->element_count++]; + +- if (!hlsl_sm1_register_from_semantic(&program->shader_version, +- var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) + { +- unsigned int usage_idx; +- D3DDECLUSAGE usage; +- bool ret; ++ hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); ++ return false; ++ } + +- register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2))) ++ return false; + +- ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); +- VKD3D_ASSERT(ret); +- /* With the exception of vertex POSITION output, none of these are +- * system values. Pixel POSITION input is not equivalent to +- * SV_Position; the closer equivalent is VPOS, which is not declared +- * as a semantic. */ +- if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX +- && output && usage == D3DDECLUSAGE_POSITION) +- sysval = VKD3D_SHADER_SV_POSITION; ++ writemask = vkd3d_write_mask_from_component_count(value->data_type->dimx); ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ &ins->dst[0], &store->resource, &instr->loc, writemask)) ++ return false; ++ } ++ else ++ { ++ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2))) ++ return false; ++ ++ if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, ++ &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL)) ++ return false; + } +- mask = (1 << var->data_type->dimx) - 1; + +- memset(element, 0, sizeof(*element)); +- if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) ++ vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); ++ ++ return true; ++} ++ ++static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++{ ++ struct vkd3d_string_buffer *dst_type_string; ++ struct hlsl_ir_node *instr, *next; ++ struct hlsl_ir_switch_case *c; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { +- --signature->element_count; +- ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; +- return; ++ if (instr->data_type) ++ { ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); ++ break; ++ } ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_CALL: ++ vkd3d_unreachable(); ++ ++ case HLSL_IR_CONSTANT: ++ /* In SM4 all constants are inlined. */ ++ break; ++ ++ case HLSL_IR_EXPR: ++ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) ++ break; ++ ++ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ ++ hlsl_release_string_buffer(ctx, dst_type_string); ++ break; ++ ++ case HLSL_IR_IF: ++ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); ++ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); ++ break; ++ ++ case HLSL_IR_LOAD: ++ if (sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr))) ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ break; ++ ++ case HLSL_IR_LOOP: ++ sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); ++ break; ++ ++ case HLSL_IR_RESOURCE_STORE: ++ if (sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr))) ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ break; ++ ++ case HLSL_IR_STORE: ++ if (sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr))) ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ break; ++ ++ case HLSL_IR_SWITCH: ++ LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) ++ sm4_generate_vsir_block(ctx, &c->body, program); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ break; ++ ++ default: ++ break; ++ } + } +- element->semantic_index = var->semantic.index; +- element->sysval_semantic = sysval; +- element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; +- element->register_index = register_index; +- element->target_location = register_index; +- element->register_count = 1; +- element->mask = mask; +- element->used_mask = mask; +- if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) +- element->interpolation_mode = VKD3DSIM_LINEAR; + } + +-static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) ++static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, ++ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) + { ++ bool is_patch_constant_func = func == ctx->patch_constant_func; ++ struct hlsl_block block = {0}; ++ struct hlsl_scope *scope; + struct hlsl_ir_var *var; ++ uint32_t temp_count; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ compute_liveness(ctx, func); ++ mark_indexable_vars(ctx, func); ++ temp_count = allocate_temp_registers(ctx, func); ++ if (ctx->result) ++ return; ++ program->temp_count = max(program->temp_count, temp_count); ++ ++ hlsl_block_init(&block); ++ ++ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) + { +- if (var->is_input_semantic) +- sm1_generate_vsir_signature_entry(ctx, program, false, var); +- if (var->is_output_semantic) +- sm1_generate_vsir_signature_entry(ctx, program, true, var); ++ if ((var->is_input_semantic && var->last_read) ++ || (var->is_output_semantic && var->first_write)) ++ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); ++ } ++ ++ if (temp_count) ++ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); ++ ++ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) ++ { ++ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) ++ continue; ++ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) ++ continue; ++ ++ if (var->indexable) ++ { ++ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; ++ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; ++ ++ sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); ++ } ++ } + } ++ ++ list_move_head(&func->body.instrs, &block.instrs); ++ ++ hlsl_block_cleanup(&block); ++ ++ sm4_generate_vsir_block(ctx, &func->body, program); + } + + /* OBJECTIVE: Translate all the information from ctx and entry_func to the +- * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() +- * without relying on ctx and entry_func. */ +-static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +- uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) ++ * vsir_program, so it can be used as input to tpf_compile() without relying ++ * on ctx and entry_func. */ ++static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, ++ uint64_t config_flags, struct vsir_program *program) + { + struct vkd3d_shader_version version = {0}; +- struct vkd3d_bytecode_buffer buffer = {0}; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; +- if (!vsir_program_init(program, NULL, &version, 0)) ++ ++ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + +- write_sm1_uniforms(ctx, &buffer); +- if (buffer.status) ++ generate_vsir_signature(ctx, program, func); + if (version.type == VKD3D_SHADER_TYPE_HULL) + generate_vsir_signature(ctx, program, ctx->patch_constant_func); ++ ++ if (version.type == VKD3D_SHADER_TYPE_COMPUTE) + { +- vkd3d_free(buffer.data); +- ctx->result = buffer.status; +- return; ++ program->thread_group_size.x = ctx->thread_count[0]; ++ program->thread_group_size.y = ctx->thread_count[1]; ++ program->thread_group_size.z = ctx->thread_count[2]; + } +- ctab->code = buffer.data; +- ctab->size = buffer.size; + +- sm1_generate_vsir_signature(ctx, program); ++ sm4_generate_vsir_add_function(ctx, func, config_flags, program); ++ if (version.type == VKD3D_SHADER_TYPE_HULL) ++ sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); } static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -@@ -6337,16 +7715,23 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru +@@ -6337,16 +9331,95 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return true; } -int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) ++static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *call, *rhs, *store; ++ struct hlsl_ir_function_decl *func; ++ unsigned int component_count; ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_expr *expr; ++ struct hlsl_ir_var *lhs; ++ char *body; ++ ++ static const char template[] = ++ "typedef uint%u uintX;\n" ++ "uintX soft_f32tof16(float%u x)\n" ++ "{\n" ++ " uintX v = asuint(x);\n" ++ " uintX v_abs = v & 0x7fffffff;\n" ++ " uintX sign_bit = (v >> 16) & 0x8000;\n" ++ " uintX exp = (v >> 23) & 0xff;\n" ++ " uintX mantissa = v & 0x7fffff;\n" ++ " uintX nan16;\n" ++ " uintX nan = (v & 0x7f800000) == 0x7f800000;\n" ++ " uintX val;\n" ++ "\n" ++ " val = 113 - exp;\n" ++ " val = (mantissa + 0x800000) >> val;\n" ++ " val >>= 13;\n" ++ "\n" ++ " val = (exp - 127) < -38 ? 0 : val;\n" ++ "\n" ++ " val = v_abs < 0x38800000 ? val : (v_abs + 0xc8000000) >> 13;\n" ++ " val = v_abs > 0x47ffe000 ? 0x7bff : val;\n" ++ "\n" ++ " nan16 = (((v >> 13) | (v >> 3) | v) & 0x3ff) + 0x7c00;\n" ++ " val = nan ? nan16 : val;\n" ++ "\n" ++ " return (val & 0x7fff) + sign_bit;\n" ++ "}\n"; ++ ++ if (node->type != HLSL_IR_EXPR) ++ return false; ++ ++ expr = hlsl_ir_expr(node); ++ ++ if (expr->op != HLSL_OP1_F32TOF16) ++ return false; ++ ++ rhs = expr->operands[0].node; ++ component_count = hlsl_type_component_count(rhs->data_type); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) ++ return false; ++ ++ if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body))) ++ return false; ++ ++ lhs = func->parameters.vars[0]; ++ ++ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) ++ return false; ++ hlsl_block_add_instr(block, store); ++ ++ if (!(call = hlsl_new_call(ctx, func, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, call); ++ ++ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, &load->node); ++ ++ return true; ++} ++ +static void process_entry_function(struct hlsl_ctx *ctx, + const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) { @@ -11389,7 +16073,7 @@ index 92b5c71c43f..88bec8610cb 100644 memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx)); hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx); -@@ -6355,7 +7740,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -6355,10 +9428,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry /* Avoid going into an infinite loop when processing call instructions. * lower_return() recurses into inferior calls. */ if (ctx->result) @@ -11397,8 +16081,14 @@ index 92b5c71c43f..88bec8610cb 100644 + return; if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) ++ { lower_ir(ctx, lower_f16tof32, body); -@@ -6367,20 +7752,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry ++ lower_ir(ctx, lower_f32tof16, body); ++ } + + lower_return(ctx, entry_func, body, false); + +@@ -6367,20 +9443,22 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_matrix_swizzles, body); lower_ir(ctx, lower_index_loads, body); @@ -11428,7 +16118,7 @@ index 92b5c71c43f..88bec8610cb 100644 else { if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT -@@ -6392,9 +7779,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -6392,9 +9470,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry } if (var->storage_modifiers & HLSL_STORAGE_IN) @@ -11440,7 +16130,7 @@ index 92b5c71c43f..88bec8610cb 100644 } } if (entry_func->return_var) -@@ -6403,24 +7790,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -6403,28 +9481,17 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC, "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name); @@ -11466,7 +16156,15 @@ index 92b5c71c43f..88bec8610cb 100644 if (profile->major_version >= 4) { hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); -@@ -6496,29 +7868,69 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + } ++ else ++ { ++ hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); ++ } + + transform_unroll_loops(ctx, body); + hlsl_run_const_passes(ctx, body); +@@ -6496,29 +9563,72 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry while (hlsl_transform_ir(ctx, dce, body, NULL)); compute_liveness(ctx, entry_func); @@ -11493,13 +16191,15 @@ index 92b5c71c43f..88bec8610cb 100644 + parse_entry_function_attributes(ctx, entry_func); + if (ctx->result) + return ctx->result; - -- allocate_register_reservations(ctx); ++ + if (profile->type == VKD3D_SHADER_TYPE_HULL) + validate_hull_shader_attributes(ctx, entry_func); + else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) + hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, + "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); ++ else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) ++ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, ++ "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); + + hlsl_block_init(&global_uniform_block); + @@ -11512,7 +16212,8 @@ index 92b5c71c43f..88bec8610cb 100644 + process_entry_function(ctx, &global_uniform_block, entry_func); + if (ctx->result) + return ctx->result; -+ + +- allocate_register_reservations(ctx); + if (profile->type == VKD3D_SHADER_TYPE_HULL) + { + process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func); @@ -11546,7 +16247,7 @@ index 92b5c71c43f..88bec8610cb 100644 if (ctx->result) return ctx->result; -@@ -6540,14 +7952,29 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry +@@ -6540,14 +9650,29 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry return ctx->result; } @@ -11621,7 +16322,7 @@ index db4913b7c62..716adb15f08 100644 break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 747238e2fee..b47f12d2188 100644 +index 747238e2fee..56c98d30661 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -19,6 +19,15 @@ @@ -11646,24 +16347,55 @@ index 747238e2fee..b47f12d2188 100644 bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve) + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -+ bool normalised_io) ++ enum vsir_normalisation_level normalisation_level) { memset(program, 0, sizeof(*program)); -@@ -87,6 +97,9 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c +@@ -87,6 +97,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c } program->shader_version = *version; + program->cf_type = cf_type; -+ program->normalised_io = normalised_io; -+ program->normalised_hull_cp_io = normalised_io; ++ program->normalisation_level = normalisation_level; return shader_instruction_array_init(&program->instructions, reserve); } -@@ -117,26 +130,156 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( +@@ -117,26 +129,204 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( return NULL; } ++static struct signature_element *vsir_signature_find_element_by_name( ++ const struct shader_signature *signature, const char *semantic_name, unsigned int semantic_index) ++{ ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ { ++ if (!ascii_strcasecmp(signature->elements[i].semantic_name, semantic_name) ++ && signature->elements[i].semantic_index == semantic_index) ++ return &signature->elements[i]; ++ } ++ ++ return NULL; ++} ++ ++bool vsir_signature_find_sysval(const struct shader_signature *signature, ++ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index) ++{ ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ e = &signature->elements[i]; ++ if (e->sysval_semantic == sysval && e->semantic_index == semantic_index) ++ { ++ *element_index = i; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ +void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) +{ @@ -11806,12 +16538,28 @@ index 747238e2fee..b47f12d2188 100644 + dst->reg.idx[0].offset = idx; +} + ++static void dst_param_init_temp_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ dst->reg.idx[0].offset = idx; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++} ++ +static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + dst->reg.idx[0].offset = idx; +} + ++static void dst_param_init_output(struct vkd3d_shader_dst_param *dst, ++ enum vkd3d_data_type data_type, uint32_t idx, uint32_t write_mask) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, data_type, 1); ++ dst->reg.idx[0].offset = idx; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = write_mask; ++} ++ +void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode opcode) +{ @@ -11825,7 +16573,7 @@ index 747238e2fee..b47f12d2188 100644 struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) { -@@ -161,6 +304,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, +@@ -161,6 +351,37 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } @@ -11863,7 +16611,7 @@ index 747238e2fee..b47f12d2188 100644 static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_data_type data_type, enum vkd3d_shader_opcode *opcode, bool *requires_swap) { -@@ -441,10 +615,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog +@@ -441,10 +662,58 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog return VKD3D_OK; } @@ -11923,16 +16671,20 @@ index 747238e2fee..b47f12d2188 100644 unsigned int tmp_idx = ~0u, i; enum vkd3d_result ret; -@@ -473,6 +695,8 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr +@@ -471,8 +740,12 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + + case VKD3DSIH_DCL: case VKD3DSIH_DCL_CONSTANT_BUFFER: ++ case VKD3DSIH_DCL_GLOBAL_FLAGS: case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_TEMPS: ++ case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: + case VKD3DSIH_DCL_THREAD_GROUP: + case VKD3DSIH_DCL_UAV_TYPED: vkd3d_shader_instruction_make_nop(ins); break; -@@ -481,6 +705,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr +@@ -481,6 +754,38 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return ret; break; @@ -11971,7 +16723,7 @@ index 747238e2fee..b47f12d2188 100644 default: break; } -@@ -523,7 +779,7 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i +@@ -523,29 +828,197 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i } for (i = 0; i < ins->dst_count; ++i) @@ -11979,22 +16731,355 @@ index 747238e2fee..b47f12d2188 100644 + shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); } - static const struct vkd3d_shader_varying_map *find_varying_map( -@@ -541,9 +797,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( +-static const struct vkd3d_shader_varying_map *find_varying_map( +- const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) ++/* Ensure that the program closes with a ret. sm1 programs do not, by default. ++ * Many of our IR passes rely on this in order to insert instructions at the ++ * end of execution. */ ++static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) + { +- unsigned int i; +- +- for (i = 0; i < varying_map->varying_count; ++i) +- { +- if (varying_map->varying_map[i].output_signature_index == signature_idx) +- return &varying_map->varying_map[i]; +- } ++ static const struct vkd3d_shader_location no_loc; ++ if (program->instructions.count ++ && program->instructions.elements[program->instructions.count - 1].opcode == VKD3DSIH_RET) ++ return VKD3D_OK; + +- return NULL; ++ if (!shader_instruction_array_insert_at(&program->instructions, program->instructions.count, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ vsir_instruction_init(&program->instructions.elements[program->instructions.count - 1], &no_loc, VKD3DSIH_RET); ++ return VKD3D_OK; } - static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, +-static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) ++static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct shader_signature *signature = &program->output_signature; ++ struct signature_element *new_elements, *e; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) ++ return VKD3D_OK; ++ ++ if ((e = vsir_signature_find_element_by_name(signature, "COLOR", 0))) ++ { ++ program->diffuse_written_mask = e->mask; ++ e->mask = VKD3DSP_WRITEMASK_ALL; ++ ++ return VKD3D_OK; ++ } ++ ++ if (!(new_elements = vkd3d_realloc(signature->elements, ++ (signature->element_count + 1) * sizeof(*signature->elements)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ signature->elements = new_elements; ++ e = &signature->elements[signature->element_count++]; ++ memset(e, 0, sizeof(*e)); ++ e->semantic_name = vkd3d_strdup("COLOR"); ++ e->sysval_semantic = VKD3D_SHADER_SV_NONE; ++ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ e->register_count = 1; ++ e->mask = VKD3DSP_WRITEMASK_ALL; ++ e->used_mask = VKD3DSP_WRITEMASK_ALL; ++ e->register_index = SM1_COLOR_REGISTER_OFFSET; ++ e->target_location = SM1_COLOR_REGISTER_OFFSET; ++ e->interpolation_mode = VKD3DSIM_NONE; ++ ++ return VKD3D_OK; ++} ++ ++/* Uninitialized components of diffuse yield 1.0 in SM1-2. Implement this by ++ * always writing diffuse in those versions, even if the PS doesn't read it. */ ++static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ static const struct vkd3d_shader_location no_loc; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int i; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX ++ || program->diffuse_written_mask == VKD3DSP_WRITEMASK_ALL) ++ return VKD3D_OK; ++ ++ /* Write the instruction after all LABEL, DCL, and NOP instructions. ++ * We need to skip NOP instructions because they might result from removed ++ * DCLs, and there could still be DCLs after NOPs. */ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) ++ break; ++ } ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[i]; ++ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = 0; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask; ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ for (i = 0; i < 4; ++i) ++ ins->src[0].reg.u.immconst_f32[i] = 1.0f; ++ return VKD3D_OK; ++} ++ ++static const struct vkd3d_shader_varying_map *find_varying_map( ++ const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) ++{ ++ unsigned int i; ++ ++ for (i = 0; i < varying_map->varying_count; ++i) ++ { ++ if (varying_map->varying_map[i].output_signature_index == signature_idx) ++ return &varying_map->varying_map[i]; ++ } ++ ++ return NULL; ++} ++ ++static bool target_allows_subset_masks(const struct vkd3d_shader_compile_info *info) ++{ ++ const struct vkd3d_shader_spirv_target_info *spirv_info; ++ enum vkd3d_shader_spirv_environment environment; ++ ++ switch (info->target_type) ++ { ++ case VKD3D_SHADER_TARGET_SPIRV_BINARY: ++ spirv_info = vkd3d_find_struct(info->next, SPIRV_TARGET_INFO); ++ environment = spirv_info ? spirv_info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; ++ ++ switch (environment) ++ { ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: ++ return true; ++ ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: ++ /* FIXME: Allow KHR_maintenance4. */ ++ return false; ++ ++ default: ++ FIXME("Unrecognized environment %#x.\n", environment); ++ return false; ++ } ++ ++ default: ++ return true; ++ } ++} ++ ++static void remove_unread_output_components(const struct shader_signature *signature, ++ struct vkd3d_shader_instruction *ins, struct vkd3d_shader_dst_param *dst) ++{ ++ const struct signature_element *e; ++ ++ switch (dst->reg.type) ++ { ++ case VKD3DSPR_OUTPUT: ++ e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); ++ break; ++ ++ case VKD3DSPR_ATTROUT: ++ e = vsir_signature_find_element_for_reg(signature, ++ SM1_COLOR_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); ++ break; ++ ++ case VKD3DSPR_RASTOUT: ++ e = vsir_signature_find_element_for_reg(signature, ++ SM1_RASTOUT_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); ++ break; ++ ++ default: ++ return; ++ } ++ ++ /* We already changed the mask earlier. */ ++ dst->write_mask &= e->mask; ++ ++ if (!dst->write_mask) ++ { ++ if (ins->dst_count == 1) ++ vkd3d_shader_instruction_make_nop(ins); ++ else ++ vsir_dst_param_init(dst, VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); ++ } ++} ++ ++static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, + struct vsir_transformation_context *ctx) { - const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name}; + struct vkd3d_shader_message_context *message_context = ctx->message_context; + const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info; ++ bool allows_subset_masks = target_allows_subset_masks(compile_info); struct shader_signature *signature = &program->output_signature; ++ unsigned int orig_element_count = signature->element_count; const struct vkd3d_shader_varying_map_info *varying_map; ++ struct signature_element *new_elements, *e; ++ unsigned int uninit_varying_count = 0; ++ unsigned int subset_varying_count = 0; ++ unsigned int new_register_count = 0; unsigned int i; -@@ -727,144 +985,20 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali + + if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO))) +@@ -554,22 +1027,29 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + for (i = 0; i < signature->element_count; ++i) + { + const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i); +- struct signature_element *e = &signature->elements[i]; + ++ e = &signature->elements[i]; + if (map) + { + unsigned int input_mask = map->input_mask; + + e->target_location = map->input_register_index; + +- /* It is illegal in Vulkan if the next shader uses the same varying +- * location with a different mask. */ +- if (input_mask && input_mask != e->mask) ++ if ((input_mask & e->mask) == input_mask) ++ { ++ ++subset_varying_count; ++ if (!allows_subset_masks) ++ { ++ e->mask = input_mask; ++ e->used_mask &= input_mask; ++ } ++ } ++ else if (input_mask && input_mask != e->mask) + { + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " +- "Output mask %#x does not match input mask %#x.", +- e->mask, input_mask); ++ "Input mask %#x reads components not written in output mask %#x.", ++ input_mask, e->mask); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } +@@ -577,17 +1057,103 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + { + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; + } ++ ++ new_register_count = max(new_register_count, e->register_index + 1); + } + ++ /* Handle uninitialized varyings by writing them before every ret. ++ * ++ * As far as sm1-sm3 is concerned, drivers disagree on what uninitialized ++ * varyings contain. ++ * ++ * - Diffuse (COLOR0) reliably contains (1, 1, 1, 1) in SM1/2. ++ * In SM3 it may contain (0, 0, 0, 0), (0, 0, 0, 1), or (1, 1, 1, 1). ++ * ++ * - Specular (COLOR1) contains (0, 0, 0, 0) or (0, 0, 0, 1). ++ * WARP writes (1, 1, 1, 1). ++ * ++ * - Anything else contains (0, 0, 0, 0) or (0, 0, 0, 1). ++ * ++ * We don't have enough knowledge to identify diffuse here. Instead we deal ++ * with that in vsir_program_ensure_diffuse(), by always writing diffuse if ++ * the shader doesn't. ++ */ ++ + for (i = 0; i < varying_map->varying_count; ++i) + { + if (varying_map->varying_map[i].output_signature_index >= signature->element_count) ++ ++uninit_varying_count; ++ } ++ ++ if (!(new_elements = vkd3d_realloc(signature->elements, ++ (signature->element_count + uninit_varying_count) * sizeof(*signature->elements)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ signature->elements = new_elements; ++ ++ for (i = 0; i < varying_map->varying_count; ++i) ++ { ++ const struct vkd3d_shader_varying_map *map = &varying_map->varying_map[i]; ++ ++ if (map->output_signature_index < orig_element_count) ++ continue; ++ ++ TRACE("Synthesizing zero value for uninitialized output %u (mask %u).\n", ++ map->input_register_index, map->input_mask); ++ e = &signature->elements[signature->element_count++]; ++ memset(e, 0, sizeof(*e)); ++ e->sysval_semantic = VKD3D_SHADER_SV_NONE; ++ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ e->register_count = 1; ++ e->mask = map->input_mask; ++ e->used_mask = map->input_mask; ++ e->register_index = new_register_count++; ++ e->target_location = map->input_register_index; ++ e->interpolation_mode = VKD3DSIM_LINEAR; ++ } ++ ++ /* Write each uninitialized varying before each ret. */ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ struct vkd3d_shader_location loc; ++ ++ if (ins->opcode != VKD3DSIH_RET) ++ continue; ++ ++ loc = ins->location; ++ if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[i]; ++ ++ for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) + { +- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, +- "Aborting due to not yet implemented feature: " +- "The next stage consumes varyings not written by this stage."); +- return VKD3D_ERROR_NOT_IMPLEMENTED; ++ e = &signature->elements[j]; ++ ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, e->register_index, e->mask); ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ins; + } ++ ++ i += uninit_varying_count; ++ } ++ ++ /* Vulkan (without KHR_maintenance4) disallows any mismatching masks, ++ * including when the input mask is a proper subset of the output mask. ++ * Resolve this by rewriting the shader to remove unread components from ++ * any writes to the output variable. */ ++ ++ if (!subset_varying_count || allows_subset_masks) ++ return VKD3D_OK; ++ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ for (unsigned int j = 0; j < ins->dst_count; ++j) ++ remove_unread_output_components(signature, ins, &ins->dst[j]); + } + + return VKD3D_OK; +@@ -727,192 +1293,68 @@ static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normali return VKD3D_OK; } @@ -12043,69 +17128,101 @@ index 747238e2fee..b47f12d2188 100644 - param->modifiers = VKD3DSPDM_NONE; - param->shift = 0; -} -- ++ flattener.phase = VKD3DSIH_INVALID; ++ for (i = 0, locations.count = 0; i < instructions->count; ++i) ++ flattener_eliminate_phase_related_dcls(&flattener, i, &locations); + -void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) -{ - vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); - param->reg.dimension = VSIR_DIMENSION_NONE; - param->reg.idx[0].offset = label_id; -} -- ++ if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) ++ return result; + -static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - src->reg.idx[0].offset = idx; -} -- ++ if (flattener.phase != VKD3DSIH_INVALID) ++ { ++ if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); ++ } + -static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); - src->reg.idx[0].offset = idx; --} -- ++ program->instructions = flattener.instructions; ++ return result; + } + -static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) --{ ++struct control_point_normaliser + { - vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; -} -- ++ struct vkd3d_shader_instruction_array instructions; ++ enum vkd3d_shader_opcode phase; ++ struct vkd3d_shader_src_param *outpointid_param; ++}; + -static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) --{ ++static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) + { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); - dst->reg.idx[0].offset = idx; --} -- ++ return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; + } + -static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) --{ ++struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(struct vsir_program *program) + { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - dst->reg.idx[0].offset = idx; - dst->write_mask = VKD3DSP_WRITEMASK_0; -} -- ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ struct vkd3d_shader_src_param *rel_addr; + -static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src->reg.idx[0].offset = idx; -} -- ++ if (instructions->outpointid_param) ++ return instructions->outpointid_param; + -static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) -{ - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); - src->reg.idx[0].offset = idx; -} -- ++ if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) ++ return NULL; + -static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value) -{ - vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); - src->reg.u.immconst_u32[0] = value; -} -- ++ vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); ++ rel_addr->swizzle = 0; ++ rel_addr->modifiers = 0; + -static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) -{ - vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); - src->reg.idx[0].offset = idx; --} -- ++ instructions->outpointid_param = rel_addr; ++ return rel_addr; + } + -void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode opcode) -{ @@ -12144,34 +17261,60 @@ index 747238e2fee..b47f12d2188 100644 - flattener.phase = VKD3DSIH_INVALID; - for (i = 0, locations.count = 0; i < instructions->count; ++i) - flattener_eliminate_phase_related_dcls(&flattener, i, &locations); -+ flattener.phase = VKD3DSIH_INVALID; -+ for (i = 0, locations.count = 0; i < instructions->count; ++i) -+ flattener_eliminate_phase_related_dcls(&flattener, i, &locations); - - if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) - return result; -@@ -876,7 +1010,7 @@ static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd - vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); - } - +- +- if ((result = flattener_flatten_phases(&flattener, &locations)) < 0) +- return result; +- +- if (flattener.phase != VKD3DSIH_INVALID) +- { +- if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1)) +- return VKD3D_ERROR_OUT_OF_MEMORY; +- vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET); +- } +- - *src_instructions = flattener.instructions; -+ program->instructions = flattener.instructions; - return result; - } - -@@ -892,9 +1026,9 @@ static bool control_point_normaliser_is_in_control_point_phase(const struct cont - return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; - } - +- return result; +-} +- +-struct control_point_normaliser +-{ +- struct vkd3d_shader_instruction_array instructions; +- enum vkd3d_shader_opcode phase; +- struct vkd3d_shader_src_param *outpointid_param; +-}; +- +-static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser) +-{ +- return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; +-} +- -struct vkd3d_shader_src_param *instruction_array_create_outpointid_param( - struct vkd3d_shader_instruction_array *instructions) -+struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(struct vsir_program *program) +-{ +- struct vkd3d_shader_src_param *rel_addr; +- +- if (instructions->outpointid_param) +- return instructions->outpointid_param; +- +- if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1))) +- return NULL; +- +- vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0); +- rel_addr->swizzle = 0; +- rel_addr->modifiers = 0; +- +- instructions->outpointid_param = rel_addr; +- return rel_addr; +-} +- +-static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, +- struct control_point_normaliser *normaliser) ++static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param, ++ struct control_point_normaliser *normaliser) { -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; - struct vkd3d_shader_src_param *rel_addr; + struct vkd3d_shader_register *reg = &dst_param->reg; - if (instructions->outpointid_param) -@@ -991,7 +1125,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p +@@ -991,7 +1433,7 @@ static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_p } static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io( @@ -12180,16 +17323,16 @@ index 747238e2fee..b47f12d2188 100644 { struct vkd3d_shader_instruction_array *instructions; struct control_point_normaliser normaliser; -@@ -1001,12 +1135,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i +@@ -1001,12 +1443,20 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i enum vkd3d_result ret; unsigned int i, j; - if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions))) -+ VKD3D_ASSERT(!program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); + + if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) + { -+ program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; + return VKD3D_OK; + } + @@ -12203,13 +17346,13 @@ index 747238e2fee..b47f12d2188 100644 instructions = &normaliser.instructions; normaliser.phase = VKD3DSIH_INVALID; -@@ -1043,22 +1185,25 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i +@@ -1043,22 +1493,25 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i input_control_point_count = ins->declaration.count; break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; -+ program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; return VKD3D_OK; case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: @@ -12220,7 +17363,7 @@ index 747238e2fee..b47f12d2188 100644 input_control_point_count, i, &location); - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; -+ program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; return ret; default: break; @@ -12229,11 +17372,11 @@ index 747238e2fee..b47f12d2188 100644 - *src_instructions = normaliser.instructions; + program->instructions = normaliser.instructions; -+ program->normalised_hull_cp_io = true; ++ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; return VKD3D_OK; } -@@ -1098,36 +1243,35 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * +@@ -1098,36 +1551,35 @@ static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser * return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE; } @@ -12285,7 +17428,7 @@ index 747238e2fee..b47f12d2188 100644 } static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE], -@@ -1181,9 +1325,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, +@@ -1181,9 +1633,10 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, { const struct vkd3d_shader_index_range *range = &ins->declaration.index_range; const struct vkd3d_shader_register *reg = &range->dst.reg; @@ -12297,7 +17440,7 @@ index 747238e2fee..b47f12d2188 100644 switch (reg->type) { -@@ -1215,9 +1360,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, +@@ -1215,9 +1668,8 @@ static void io_normaliser_add_index_range(struct io_normaliser *normaliser, reg_idx = reg->idx[reg->idx_count - 1].offset; write_mask = range->dst.write_mask; @@ -12309,7 +17452,43 @@ index 747238e2fee..b47f12d2188 100644 } static int signature_element_mask_compare(const void *a, const void *b) -@@ -1345,6 +1489,9 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map +@@ -1276,8 +1728,34 @@ static void shader_signature_map_patch_constant_index_ranges(struct shader_signa + static int signature_element_register_compare(const void *a, const void *b) + { + const struct signature_element *e = a, *f = b; ++ int ret; ++ ++ if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) ++ return ret; + +- return vkd3d_u32_compare(e->register_index, f->register_index); ++ /* System values like SV_RenderTargetArrayIndex and SV_ViewPortArrayIndex ++ * can get packed into the same I/O register as non-system values, but ++ * only at the end. E.g.: ++ * ++ * vs_4_0 ++ * ... ++ * .output ++ * ... ++ * .param B.x, o1.x, uint ++ * .param C.y, o1.y, uint ++ * .param SV_RenderTargetArrayIndex.z, o1.z, uint, RTINDEX ++ * .text ++ * ... ++ * mov o1.xy, v1.xyxx ++ * mov o1.z, v1.z ++ * ret ++ * ++ * Because I/O normalisation doesn't split writes like the mov to o1.xy ++ * above, we want to make sure that o1.x and o1.y continue to be packed ++ * into a single register after I/O normalisation, so we order system ++ * values after non-system values here, allowing the non-system values to ++ * get merged into a single register. */ ++ return vkd3d_u32_compare(f->sysval_semantic, e->sysval_semantic); + } + + static int signature_element_index_compare(const void *a, const void *b) +@@ -1345,6 +1823,9 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map return false; memcpy(elements, s->elements, element_count * sizeof(*elements)); @@ -12319,7 +17498,7 @@ index 747238e2fee..b47f12d2188 100644 qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare); for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e) -@@ -1388,6 +1535,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map +@@ -1388,6 +1869,8 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map else e->interpolation_mode = f->interpolation_mode; } @@ -12328,7 +17507,7 @@ index 747238e2fee..b47f12d2188 100644 } } element_count = new_count; -@@ -1415,6 +1564,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map +@@ -1415,6 +1898,12 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count); e->register_count = register_count; e->mask = signature_element_range_expand_mask(e, register_count, range_map); @@ -12341,7 +17520,16 @@ index 747238e2fee..b47f12d2188 100644 } } element_count = new_count; -@@ -1518,6 +1673,9 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par +@@ -1470,6 +1959,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + const struct shader_signature *signature; + const struct signature_element *e; + ++ write_mask = dst_param->write_mask; ++ + switch (reg->type) + { + case VKD3DSPR_OUTPUT: +@@ -1518,10 +2009,17 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par break; case VKD3DSPR_RASTOUT: @@ -12351,10 +17539,19 @@ index 747238e2fee..b47f12d2188 100644 reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset; signature = normaliser->output_signature; reg->type = VKD3DSPR_OUTPUT; -@@ -1530,10 +1688,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + dcl_params = normaliser->output_dcl_params; ++ /* Fog and point size are scalar, but fxc/d3dcompiler emits a full ++ * write mask when writing to them. */ ++ if (reg->idx[0].offset > 0) ++ write_mask = VKD3DSP_WRITEMASK_0; + break; + + default: +@@ -1529,11 +2027,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + } id_idx = reg->idx_count - 1; - write_mask = dst_param->write_mask; +- write_mask = dst_param->write_mask; - element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask); + if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) + vkd3d_unreachable(); @@ -12364,7 +17561,7 @@ index 747238e2fee..b47f12d2188 100644 if (is_io_dcl) { /* Validated in the TPF reader. */ -@@ -1653,7 +1811,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par +@@ -1653,7 +2150,8 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par id_idx = reg->idx_count - 1; write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0); @@ -12374,41 +17571,50 @@ index 747238e2fee..b47f12d2188 100644 e = &signature->elements[element_idx]; if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) -@@ -1737,13 +1896,13 @@ static bool use_flat_interpolation(const struct vsir_program *program, - if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -- "Unsupported flat interpolation parameter type %#x.\n", parameter->type); -+ "Unsupported flat interpolation parameter type %#x.", parameter->type); - return false; +@@ -1725,38 +2223,14 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi } - if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) - { - vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -- "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); -+ "Invalid flat interpolation parameter data type %#x.", parameter->data_type); - return false; - } - -@@ -1751,12 +1910,14 @@ static bool use_flat_interpolation(const struct vsir_program *program, } +-static bool use_flat_interpolation(const struct vsir_program *program, +- struct vkd3d_shader_message_context *message_context) +-{ +- static const struct vkd3d_shader_location no_loc; +- const struct vkd3d_shader_parameter1 *parameter; +- +- if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) +- return false; +- +- if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) +- { +- vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, +- "Unsupported flat interpolation parameter type %#x.\n", parameter->type); +- return false; +- } +- if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) +- { +- vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, +- "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); +- return false; +- } +- +- return parameter->u.immediate_constant.u.u32; +-} +- static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) + struct vsir_transformation_context *ctx) { -+ struct vkd3d_shader_message_context *message_context = ctx->message_context; struct io_normaliser normaliser = {program->instructions}; struct vkd3d_shader_instruction *ins; - bool has_control_point_phase; - unsigned int i, j; + unsigned int i; + -+ VKD3D_ASSERT(!program->normalised_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO); normaliser.phase = VKD3DSIH_INVALID; normaliser.shader_type = program->shader_version.type; -@@ -1765,7 +1926,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -1765,7 +2239,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.output_signature = &program->output_signature; normaliser.patch_constant_signature = &program->patch_constant_signature; @@ -12417,7 +17623,7 @@ index 747238e2fee..b47f12d2188 100644 { ins = &program->instructions.elements[i]; -@@ -1779,8 +1940,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -1779,8 +2253,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program vkd3d_shader_instruction_make_nop(ins); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: @@ -12426,7 +17632,7 @@ index 747238e2fee..b47f12d2188 100644 case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: normaliser.phase = ins->opcode; -@@ -1790,22 +1949,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -1790,22 +2262,6 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program } } @@ -12449,15 +17655,33 @@ index 747238e2fee..b47f12d2188 100644 if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) -@@ -1832,6 +1975,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program +@@ -1814,24 +2270,13 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + return VKD3D_ERROR_OUT_OF_MEMORY; + } + +- if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL +- && program->shader_version.major < 4 && use_flat_interpolation(program, message_context)) +- { +- for (i = 0; i < program->input_signature.element_count; ++i) +- { +- struct signature_element *element = &program->input_signature.elements[i]; +- +- if (!ascii_strcasecmp(element->semantic_name, "COLOR")) +- element->interpolation_mode = VKD3DSIM_CONSTANT; +- } +- } +- + normaliser.phase = VKD3DSIH_INVALID; + for (i = 0; i < normaliser.instructions.count; ++i) + shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); program->instructions = normaliser.instructions; program->use_vocp = normaliser.use_vocp; -+ program->normalised_io = true; ++ program->normalisation_level = VSIR_FULLY_NORMALISED_IO; return VKD3D_OK; } -@@ -1918,7 +2062,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par +@@ -1918,7 +2363,8 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par param->reg.idx_count = 3; } @@ -12467,7 +17691,7 @@ index 747238e2fee..b47f12d2188 100644 { struct flat_constants_normaliser normaliser = {0}; unsigned int i, j; -@@ -1957,7 +2102,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ +@@ -1957,7 +2403,8 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ return VKD3D_OK; } @@ -12477,7 +17701,7 @@ index 747238e2fee..b47f12d2188 100644 { size_t i, depth = 0; bool dead = false; -@@ -2045,103 +2191,6 @@ static void remove_dead_code(struct vsir_program *program) +@@ -2045,103 +2492,6 @@ static void remove_dead_code(struct vsir_program *program) break; } } @@ -12581,7 +17805,45 @@ index 747238e2fee..b47f12d2188 100644 return VKD3D_OK; } -@@ -2789,11 +2838,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte +@@ -2434,15 +2784,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla + static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, + struct vkd3d_shader_message_context *message_context) + { +- bool main_block_open, is_hull_shader, after_declarations_section; + struct vkd3d_shader_instruction_array *instructions; + struct vsir_program *program = flattener->program; ++ bool is_hull_shader, after_declarations_section; + struct vkd3d_shader_instruction *dst_ins; + size_t i; + + instructions = &program->instructions; + is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL; +- main_block_open = !is_hull_shader; + after_declarations_section = is_hull_shader; + + if (!cf_flattener_require_space(flattener, instructions->count + 1)) +@@ -2766,8 +3115,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + + if (cf_info) + cf_info->inside_block = false; +- else +- main_block_open = false; + break; + + default: +@@ -2777,23 +3124,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + } + } + +- if (main_block_open) +- { +- if (!(dst_ins = cf_flattener_require_space(flattener, 1))) +- return VKD3D_ERROR_OUT_OF_MEMORY; +- vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET); +- ++flattener->instruction_count; +- } +- + return flattener->status; } static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, @@ -12597,7 +17859,7 @@ index 747238e2fee..b47f12d2188 100644 if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) { vkd3d_free(program->instructions.elements); -@@ -2801,6 +2853,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi +@@ -2801,6 +3143,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi program->instructions.capacity = flattener.instruction_capacity; program->instructions.count = flattener.instruction_count; program->block_count = flattener.block_id; @@ -12605,7 +17867,7 @@ index 747238e2fee..b47f12d2188 100644 } else { -@@ -2860,13 +2913,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i +@@ -2860,13 +3203,16 @@ static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_i return true; } @@ -12623,7 +17885,7 @@ index 747238e2fee..b47f12d2188 100644 if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) goto fail; -@@ -3050,7 +3106,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl +@@ -3050,7 +3396,8 @@ static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *bl vkd3d_free(block_info); } @@ -12633,7 +17895,7 @@ index 747238e2fee..b47f12d2188 100644 { size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i; struct ssas_to_temps_block_info *info, *block_info = NULL; -@@ -3058,6 +3115,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ +@@ -3058,6 +3405,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ struct ssas_to_temps_alloc alloc = {0}; unsigned int current_label = 0; @@ -12642,7 +17904,7 @@ index 747238e2fee..b47f12d2188 100644 if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info)))) { ERR("Failed to allocate block info array.\n"); -@@ -5271,12 +5330,15 @@ out: +@@ -5271,12 +5620,15 @@ out: } static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, @@ -12659,7 +17921,7 @@ index 747238e2fee..b47f12d2188 100644 target.jump_target_temp_idx = program->temp_count; target.temp_count = program->temp_count + 1; -@@ -5324,6 +5386,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, +@@ -5324,6 +5676,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, program->instructions.capacity = target.ins_capacity; program->instructions.count = target.ins_count; program->temp_count = target.temp_count; @@ -12667,7 +17929,7 @@ index 747238e2fee..b47f12d2188 100644 return VKD3D_OK; -@@ -5451,11 +5514,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f +@@ -5451,11 +5804,14 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps_in_f } static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program, @@ -12683,7 +17945,72 @@ index 747238e2fee..b47f12d2188 100644 for (i = 0; i < program->instructions.count;) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -@@ -5508,9 +5574,11 @@ static bool find_colour_signature_idx(const struct shader_signature *signature, +@@ -5491,26 +5847,67 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru + return VKD3D_OK; + } + +-static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) ++static bool use_flat_interpolation(const struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context, bool *flat) + { +- for (unsigned int i = 0; i < signature->element_count; ++i) ++ static const struct vkd3d_shader_location no_loc; ++ const struct vkd3d_shader_parameter1 *parameter; ++ ++ *flat = false; ++ ++ if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) ++ return true; ++ ++ if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { +- if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET +- && !signature->elements[i].register_index) +- { +- *index = i; +- return true; +- } ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported flat interpolation parameter type %#x.", parameter->type); ++ return false; ++ } ++ if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid flat interpolation parameter data type %#x.", parameter->data_type); ++ return false; + } + +- return false; ++ *flat = parameter->u.immediate_constant.u.u32; ++ return true; ++} ++ ++static enum vkd3d_result vsir_program_apply_flat_interpolation(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ unsigned int i; ++ bool flat; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL || program->shader_version.major >= 4) ++ return VKD3D_OK; ++ ++ if (!use_flat_interpolation(program, ctx->message_context, &flat)) ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ ++ if (!flat) ++ return VKD3D_OK; ++ ++ for (i = 0; i < program->input_signature.element_count; ++i) ++ { ++ struct signature_element *element = &program->input_signature.elements[i]; ++ ++ if (!ascii_strcasecmp(element->semantic_name, "COLOR")) ++ element->interpolation_mode = VKD3DSIM_CONSTANT; ++ } ++ ++ return VKD3D_OK; + } static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, @@ -12696,7 +18023,7 @@ index 747238e2fee..b47f12d2188 100644 size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins; -@@ -5565,6 +5633,11 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr +@@ -5565,6 +5962,11 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); break; @@ -12708,7 +18035,7 @@ index 747238e2fee..b47f12d2188 100644 default: FIXME("Unhandled parameter data type %#x.\n", ref->data_type); return VKD3D_ERROR_NOT_IMPLEMENTED; -@@ -5596,8 +5669,9 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr +@@ -5596,12 +5998,13 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr } static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, @@ -12717,9 +18044,23 @@ index 747238e2fee..b47f12d2188 100644 { + struct vkd3d_shader_message_context *message_context = ctx->message_context; const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; ++ uint32_t colour_signature_idx, colour_temp = ~0u; static const struct vkd3d_shader_location no_loc; enum vkd3d_shader_comparison_func compare_func; -@@ -5620,13 +5694,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro +- uint32_t colour_signature_idx, colour_temp; + struct vkd3d_shader_instruction *ins; + size_t new_pos; + int ret; +@@ -5609,7 +6012,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK; + +- if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) ++ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx) + || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) + return VKD3D_OK; + +@@ -5620,13 +6023,13 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) { vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, @@ -12735,7 +18076,7 @@ index 747238e2fee..b47f12d2188 100644 return VKD3D_ERROR_INVALID_ARGUMENT; } compare_func = func->u.immediate_constant.u.u32; -@@ -5650,7 +5724,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro +@@ -5650,7 +6053,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro if (ins->opcode == VKD3DSIH_RET) { if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, @@ -12744,7 +18085,7 @@ index 747238e2fee..b47f12d2188 100644 return ret; i = new_pos; continue; -@@ -5677,456 +5751,1845 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro +@@ -5677,456 +6080,2081 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro return VKD3D_OK; } @@ -12768,24 +18109,19 @@ index 747238e2fee..b47f12d2188 100644 - CF_TYPE_BLOCKS, - } cf_type; - bool inside_block; -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int output_idx = 0; - +- - struct validation_context_temp_data - { - enum vsir_dimension dimension; - size_t first_seen; - } *temps; -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int output_idx = 0; - struct validation_context_ssa_data -+ ins = &program->instructions.elements[pos]; -+ -+ for (unsigned int i = 0; i < 8; ++i) - { +- { - enum vsir_dimension dimension; - enum vkd3d_data_type data_type; - size_t first_seen; @@ -12793,24 +18129,35 @@ index 747238e2fee..b47f12d2188 100644 - uint32_t read_mask; - size_t first_assigned; - } *ssas; -+ if (!(mask & (1u << i))) -+ continue; ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; - enum vkd3d_shader_opcode *blocks; - size_t depth; - size_t blocks_capacity; -}; -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); -+ src_param_init_temp_float4(&ins->src[0], position_temp); -+ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); -+ ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins = &program->instructions.elements[pos]; -static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, - enum vkd3d_shader_error error, const char *format, ...) -{ - struct vkd3d_string_buffer buf; - va_list args; ++ for (unsigned int i = 0; i < 8; ++i) ++ { ++ if (!(mask & (1u << i))) ++ continue; + +- vkd3d_string_buffer_init(&buf); ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); ++ src_param_init_temp_float4(&ins->src[0], position_temp); ++ src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); ++ ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; + +- va_start(args, format); +- vkd3d_string_buffer_vprintf(&buf, format, args); +- va_end(args); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + if (output_idx < 4) + ins->dst[0].reg.idx[0].offset = low_signature_idx; @@ -12820,13 +18167,21 @@ index 747238e2fee..b47f12d2188 100644 + ins->dst[0].write_mask = (1u << (output_idx % 4)); + ++output_idx; -- vkd3d_string_buffer_init(&buf); +- if (ctx->invalid_instruction_idx) +- { +- vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); +- ERR("VSIR validation error: %s\n", buf.buffer); +- } +- else +- { +- const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; +- vkd3d_shader_error(ctx->message_context, &ins->location, error, +- "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); +- ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); + ++ins; -+ } + } -- va_start(args, format); -- vkd3d_string_buffer_vprintf(&buf, format, args); -- va_end(args); +- vkd3d_string_buffer_cleanup(&buf); + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = position_signature_idx; @@ -12836,37 +18191,10 @@ index 747238e2fee..b47f12d2188 100644 + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -- if (ctx->invalid_instruction_idx) -- { -- vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); -- ERR("VSIR validation error: %s\n", buf.buffer); -- } -- else -+ *ret_pos = pos + vkd3d_popcount(mask) + 1; -+ return VKD3D_OK; -+} -+ -+static bool find_sysval_signature_idx(const struct shader_signature *signature, -+ enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx) -+{ -+ for (unsigned int i = 0; i < signature->element_count; ++i) - { -- const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; -- vkd3d_shader_error(ctx->message_context, &ins->location, error, -- "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); -- ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); -+ if (signature->elements[i].sysval_semantic == sysval) -+ { -+ *idx = i; -+ return true; -+ } - } - -- vkd3d_string_buffer_cleanup(&buf); -- - if (!ctx->status) - ctx->status = VKD3D_ERROR_INVALID_SHADER; -+ return false; ++ *ret_pos = pos + vkd3d_popcount(mask) + 1; ++ return VKD3D_OK; } -static void vsir_validate_src_param(struct validation_context *ctx, @@ -12928,13 +18256,13 @@ index 747238e2fee..b47f12d2188 100644 + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported clip plane mask parameter type %#x.", mask_parameter->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } + } + if (mask_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid clip plane mask parameter data type %#x.", mask_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; - } ++ } + mask = mask_parameter->u.immediate_constant.u.u32; - switch (reg->type) @@ -12959,7 +18287,7 @@ index 747238e2fee..b47f12d2188 100644 - reg->idx_count); - break; - } -+ if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx)) ++ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx)) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, + "Shader does not write position."); @@ -13080,6 +18408,15 @@ index 747238e2fee..b47f12d2188 100644 - reg->idx[0].offset, ctx->program->ssa_count); - break; - } ++static bool is_pre_rasterization_shader(enum vkd3d_shader_type type) ++{ ++ return type == VKD3D_SHADER_TYPE_VERTEX ++ || type == VKD3D_SHADER_TYPE_HULL ++ || type == VKD3D_SHADER_TYPE_DOMAIN ++ || type == VKD3D_SHADER_TYPE_GEOMETRY; ++} + +- data = &ctx->ssas[reg->idx[0].offset]; +static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, size_t *ret_pos) +{ @@ -13087,16 +18424,13 @@ index 747238e2fee..b47f12d2188 100644 + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; -- data = &ctx->ssas[reg->idx[0].offset]; -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; - - if (reg->dimension == VSIR_DIMENSION_NONE) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register."); - break; - } -+ ins = &program->instructions.elements[pos]; ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; - /* SSA registers can be scalar or vec4, provided that each - * individual register always appears with the same @@ -13121,44 +18455,40 @@ index 747238e2fee..b47f12d2188 100644 - } - break; - } -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; -+ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); ++ ins = &program->instructions.elements[pos]; - case VKD3DSPR_LABEL: - if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", - reg->precision); -+ *ret_pos = pos + 1; -+ return VKD3D_OK; -+} ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; ++ src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); - if (reg->data_type != VKD3D_DATA_UNUSED) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", - reg->data_type); ++ *ret_pos = pos + 1; ++ return VKD3D_OK; ++} + +- if (reg->dimension != VSIR_DIMENSION_NONE) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.", +- reg->dimension); +static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + const struct vkd3d_shader_parameter1 *size_parameter = NULL; + static const struct vkd3d_shader_location no_loc; -- if (reg->dimension != VSIR_DIMENSION_NONE) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.", -- reg->dimension); -+ if (program->has_point_size) -+ return VKD3D_OK; - - if (reg->idx_count != 1) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.", - reg->idx_count); - break; - } -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -+ && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY -+ && program->shader_version.type != VKD3D_SHADER_TYPE_HULL -+ && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) ++ if (program->has_point_size) + return VKD3D_OK; - if (reg->idx[0].rel_addr) @@ -13172,41 +18502,44 @@ index 747238e2fee..b47f12d2188 100644 - "LABEL register index %u exceeds the maximum count %u.", - reg->idx[0].offset, ctx->program->block_count); - break; -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; ++ if (!is_pre_rasterization_shader(program->shader_version.type)) ++ return VKD3D_OK; - case VKD3DSPR_NULL: - if (reg->idx_count != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.", - reg->idx_count); - break; -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE) -+ size_parameter = parameter; -+ } ++ for (unsigned int i = 0; i < program->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - case VKD3DSPR_IMMCONST: - if (reg->idx_count != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.", - reg->idx_count); - break; -+ if (!size_parameter) -+ return VKD3D_OK; ++ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE) ++ size_parameter = parameter; ++ } - case VKD3DSPR_IMMCONST64: - if (reg->idx_count != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.", - reg->idx_count); - break; ++ if (!size_parameter) ++ return VKD3D_OK; + +- default: +- break; + if (size_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid point size parameter data type %#x.", size_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } - -- default: -- break; ++ + program->has_point_size = true; + + /* Append a point size write before each ret. */ @@ -13250,10 +18583,7 @@ index 747238e2fee..b47f12d2188 100644 - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.", - dst->write_mask); - break; -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -+ && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY -+ && program->shader_version.type != VKD3D_SHADER_TYPE_HULL -+ && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) ++ if (!is_pre_rasterization_shader(program->shader_version.type)) + return VKD3D_OK; - case VSIR_DIMENSION_VEC4: @@ -13288,50 +18618,27 @@ index 747238e2fee..b47f12d2188 100644 - case 1: - case 2: - case 3: -- case 13: -- case 14: -- case 15: -- break; + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid minimum point size parameter data type %#x.", min_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } - -- default: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", -- dst->shift); ++ + if (max_parameter && max_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid maximum point size parameter data type %#x.", max_parameter->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; - } - -- switch (dst->reg.type) ++ } ++ + /* Replace writes to the point size by inserting a clamp before each write. */ + + for (size_t i = 0; i < program->instructions.count; ++i) - { -- case VKD3DSPR_SSA: -- if (dst->reg.idx[0].offset < ctx->program->ssa_count) -- { -- struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; ++ { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + const struct vkd3d_shader_location *loc; + unsigned int ssa_value; + bool clamp = false; - -- if (data->write_mask == 0) -- { -- data->write_mask = dst->write_mask; -- data->first_assigned = ctx->instruction_idx; -- } -- else -- { -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, -- "SSA register is already assigned at instruction %zu.", -- data->first_assigned); -- } ++ + if (vsir_instruction_is_dcl(ins)) + continue; + @@ -13345,28 +18652,15 @@ index 747238e2fee..b47f12d2188 100644 + dst_param_init_ssa_float(dst, program->ssa_count); + ssa_value = program->ssa_count++; + clamp = true; - } -- break; ++ } + } - -- case VKD3DSPR_IMMCONST: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid IMMCONST register used as destination parameter."); -- break; ++ + if (!clamp) + continue; - -- case VKD3DSPR_IMMCONST64: -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, -- "Invalid IMMCONST64 register used as destination parameter."); -- break; ++ + if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) + return VKD3D_ERROR_OUT_OF_MEMORY; - -- default: -- break; -- } --} ++ + loc = &program->instructions.elements[i].location; + ins = &program->instructions.elements[i + 1]; + @@ -13404,6 +18698,192 @@ index 747238e2fee..b47f12d2188 100644 + return VKD3D_OK; +} + ++static bool has_texcoord_signature_element(const struct shader_signature *signature) ++{ ++ for (size_t i = 0; i < signature->element_count; ++i) ++ { ++ if (!ascii_strcasecmp(signature->elements[i].semantic_name, "TEXCOORD")) ++ return true; ++ } ++ return false; ++} ++ ++/* Returns true if replacement was done. */ ++static bool replace_texcoord_with_point_coord(struct vsir_program *program, ++ struct vkd3d_shader_src_param *src, unsigned int coord_temp) ++{ ++ uint32_t prev_swizzle = src->swizzle; ++ const struct signature_element *e; ++ ++ /* The input semantic may have a nontrivial mask, which we need to ++ * correct for. E.g. if the mask is .yz, and we read from .y, that needs ++ * to become .x. */ ++ static const uint32_t inverse_swizzles[16] = ++ { ++ /* Use _ for "undefined" components, for clarity. */ ++#define VKD3D_SHADER_SWIZZLE__ VKD3D_SHADER_SWIZZLE_X ++ 0, ++ /* .x */ VKD3D_SHADER_SWIZZLE(X, _, _, _), ++ /* .y */ VKD3D_SHADER_SWIZZLE(_, X, _, _), ++ /* .xy */ VKD3D_SHADER_SWIZZLE(X, Y, _, _), ++ /* .z */ VKD3D_SHADER_SWIZZLE(_, _, X, _), ++ /* .xz */ VKD3D_SHADER_SWIZZLE(X, _, Y, _), ++ /* .yz */ VKD3D_SHADER_SWIZZLE(_, X, Y, _), ++ /* .xyz */ VKD3D_SHADER_SWIZZLE(X, Y, Z, _), ++ /* .w */ VKD3D_SHADER_SWIZZLE(_, _, _, X), ++ /* .xw */ VKD3D_SHADER_SWIZZLE(X, _, _, Y), ++ /* .yw */ VKD3D_SHADER_SWIZZLE(_, X, _, Y), ++ /* .xyw */ VKD3D_SHADER_SWIZZLE(X, Y, _, Z), ++ /* .zw */ VKD3D_SHADER_SWIZZLE(_, _, X, Y), ++ /* .xzw */ VKD3D_SHADER_SWIZZLE(X, _, Y, Z), ++ /* .yzw */ VKD3D_SHADER_SWIZZLE(_, X, Y, Z), ++ /* .xyzw */ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), ++#undef VKD3D_SHADER_SWIZZLE__ ++ }; ++ ++ if (src->reg.type != VKD3DSPR_INPUT) ++ return false; ++ e = &program->input_signature.elements[src->reg.idx[0].offset]; ++ ++ if (ascii_strcasecmp(e->semantic_name, "TEXCOORD")) ++ return false; ++ ++ src->reg.type = VKD3DSPR_TEMP; ++ src->reg.idx[0].offset = coord_temp; ++ ++ /* If the mask is already contiguous and zero-based, no need to remap ++ * the swizzle. */ ++ if (!(e->mask & (e->mask + 1))) ++ return true; ++ ++ src->swizzle = 0; ++ for (unsigned int i = 0; i < 4; ++i) ++ { ++ src->swizzle |= vsir_swizzle_get_component(inverse_swizzles[e->mask], ++ vsir_swizzle_get_component(prev_swizzle, i)) << VKD3D_SHADER_SWIZZLE_SHIFT(i); ++ } ++ ++ return true; ++} ++ ++static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ const struct vkd3d_shader_parameter1 *sprite_parameter = NULL; ++ static const struct vkd3d_shader_location no_loc; ++ struct vkd3d_shader_instruction *ins; ++ bool used_texcoord = false; ++ unsigned int coord_temp; ++ size_t i, insert_pos; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; ++ ++ for (i = 0; i < program->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; ++ ++ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE) ++ sprite_parameter = parameter; ++ } ++ ++ if (!sprite_parameter) ++ return VKD3D_OK; ++ ++ if (sprite_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported point sprite parameter type %#x.", sprite_parameter->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (sprite_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid point sprite parameter data type %#x.", sprite_parameter->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ if (!sprite_parameter->u.immediate_constant.u.u32) ++ return VKD3D_OK; ++ ++ if (!has_texcoord_signature_element(&program->input_signature)) ++ return VKD3D_OK; ++ ++ /* VKD3DSPR_POINTCOORD is a two-component value; fill the remaining two ++ * components with zeroes. */ ++ coord_temp = program->temp_count++; ++ ++ /* Construct the new temp after all LABEL, DCL, and NOP instructions. ++ * We need to skip NOP instructions because they might result from removed ++ * DCLs, and there could still be DCLs after NOPs. */ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) ++ break; ++ } ++ ++ insert_pos = i; ++ ++ /* Replace each texcoord read with a read from the point coord. */ ++ for (; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ for (unsigned int j = 0; j < ins->src_count; ++j) ++ { ++ used_texcoord |= replace_texcoord_with_point_coord(program, &ins->src[j], coord_temp); ++ ++ for (unsigned int k = 0; k < ins->src[j].reg.idx_count; ++k) ++ { ++ if (ins->src[j].reg.idx[k].rel_addr) ++ used_texcoord |= replace_texcoord_with_point_coord(program, ++ ins->src[j].reg.idx[k].rel_addr, coord_temp); ++ } ++ } ++ ++ for (unsigned int j = 0; j < ins->dst_count; ++j) ++ { ++ for (unsigned int k = 0; k < ins->dst[j].reg.idx_count; ++k) ++ { ++ if (ins->dst[j].reg.idx[k].rel_addr) ++ used_texcoord |= replace_texcoord_with_point_coord(program, ++ ins->dst[j].reg.idx[k].rel_addr, coord_temp); ++ } ++ } ++ } ++ ++ if (used_texcoord) ++ { ++ if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[insert_pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_temp_float4(&ins->dst[0], coord_temp); ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ins; ++ ++ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_temp_float4(&ins->dst[0], coord_temp); ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3; ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ins; ++ ++ program->has_point_coord = true; ++ } ++ ++ return VKD3D_OK; ++} ++ +struct validation_context +{ + struct vkd3d_shader_message_context *message_context; @@ -13435,6 +18915,9 @@ index 747238e2fee..b47f12d2188 100644 + enum vkd3d_shader_opcode *blocks; + size_t depth; + size_t blocks_capacity; ++ ++ unsigned int outer_tess_idxs[4]; ++ unsigned int inner_tess_idxs[2]; +}; + +static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx, @@ -13505,15 +18988,11 @@ index 747238e2fee..b47f12d2188 100644 + switch (ctx->program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: -+ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE ++ || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) + { + signature = &ctx->program->output_signature; -+ has_control_point = ctx->program->normalised_hull_cp_io; -+ } -+ else if (ctx->program->normalised_io) -+ { -+ signature = &ctx->program->output_signature; -+ has_control_point = true; ++ has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; + } + else + { @@ -13545,7 +19024,7 @@ index 747238e2fee..b47f12d2188 100644 + vkd3d_unreachable(); + } + -+ if (!ctx->program->normalised_io) ++ if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) + { + /* Indices are [register] or [control point, register]. Both are + * allowed to have a relative address. */ @@ -14043,12 +19522,14 @@ index 747238e2fee..b47f12d2188 100644 + case 1: + case 2: + case 3: -+ case 13: -+ case 14: -+ case 15: -+ break; -+ -+ default: + case 13: + case 14: + case 15: + break; + + default: +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", +- dst->shift); + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.", + dst->shift); + } @@ -14093,11 +19574,10 @@ index 747238e2fee..b47f12d2188 100644 + break; + } +} - - static void vsir_validate_src_param(struct validation_context *ctx, - const struct vkd3d_shader_src_param *src) - { -- vsir_validate_register(ctx, &src->reg); ++ ++static void vsir_validate_src_param(struct validation_context *ctx, ++ const struct vkd3d_shader_src_param *src) ++{ + vsir_validate_register(ctx, &src->reg); + + if (src->swizzle & ~0x03030303u) @@ -14157,10 +19637,7 @@ index 747238e2fee..b47f12d2188 100644 + "Invalid destination count %u for an instruction of type %#x, expected %u.", + instruction->dst_count, instruction->opcode, count); +} - -- if (src->swizzle & ~0x03030303u) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", -- src->swizzle); ++ +static void vsir_validate_src_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, unsigned int count) +{ @@ -14169,10 +19646,7 @@ index 747238e2fee..b47f12d2188 100644 + "Invalid source count %u for an instruction of type %#x, expected %u.", + instruction->src_count, instruction->opcode, count); +} - -- if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", -- src->reg.dimension, src->swizzle); ++ +static bool vsir_validate_src_min_count(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, unsigned int count) +{ @@ -14183,10 +19657,7 @@ index 747238e2fee..b47f12d2188 100644 + instruction->src_count, instruction->opcode, count); + return false; + } - -- if (src->modifiers >= VKD3DSPSM_COUNT) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", -- src->modifiers); ++ + return true; +} + @@ -14253,9 +19724,11 @@ index 747238e2fee..b47f12d2188 100644 + const struct shader_signature *signature, enum vsir_signature_type signature_type, + unsigned int idx) +{ ++ enum vkd3d_tessellator_domain expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_INVALID; + const char *signature_type_name = signature_type_names[signature_type]; + const struct signature_element *element = &signature->elements[idx]; -+ bool integer_type = false; ++ bool integer_type = false, is_outer = false; ++ unsigned int semantic_index_max = 0; + + if (element->register_count == 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, @@ -14307,12 +19780,6 @@ index 747238e2fee..b47f12d2188 100644 + case VKD3D_SHADER_SV_INSTANCE_ID: + case VKD3D_SHADER_SV_IS_FRONT_FACE: + case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: -+ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: -+ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: -+ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: + case VKD3D_SHADER_SV_TARGET: + case VKD3D_SHADER_SV_DEPTH: + case VKD3D_SHADER_SV_COVERAGE: @@ -14321,11 +19788,77 @@ index 747238e2fee..b47f12d2188 100644 + case VKD3D_SHADER_SV_STENCIL_REF: + break; + ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; ++ semantic_index_max = 4; ++ is_outer = true; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_QUAD; ++ semantic_index_max = 2; ++ is_outer = false; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; ++ semantic_index_max = 3; ++ is_outer = true; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE; ++ semantic_index_max = 1; ++ is_outer = false; ++ break; ++ ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: ++ case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: ++ expected_tess_domain = VKD3D_TESSELLATOR_DOMAIN_LINE; ++ semantic_index_max = 2; ++ is_outer = true; ++ break; ++ + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x.", + idx, signature_type_name, element->sysval_semantic); + break; + } + +- switch (dst->reg.type) ++ if (expected_tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) + { +- case VKD3DSPR_SSA: +- if (dst->reg.idx[0].offset < ctx->program->ssa_count) ++ if (signature_type != SIGNATURE_TYPE_PATCH_CONSTANT) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: System value semantic %#x is only valid " ++ "in the patch constant signature.", ++ idx, signature_type_name, element->sysval_semantic); ++ ++ if (ctx->program->tess_domain != expected_tess_domain) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid system value semantic %#x for tessellator domain %#x.", ++ idx, signature_type_name, element->sysval_semantic, ctx->program->tess_domain); ++ ++ if (element->semantic_index >= semantic_index_max) ++ { ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Invalid semantic index %u for system value semantic %#x.", ++ idx, signature_type_name, element->semantic_index, element->sysval_semantic); ++ } ++ else ++ { ++ unsigned int *idx_pos = &(is_outer ? ctx->outer_tess_idxs : ctx->inner_tess_idxs)[element->semantic_index]; ++ ++ if (*idx_pos != ~0u) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, ++ "element %u of %s signature: Duplicate semantic index %u for system value semantic %#x.", ++ idx, signature_type_name, element->semantic_index, element->sysval_semantic); ++ else ++ *idx_pos = idx; ++ } + } + + if (element->sysval_semantic < ARRAY_SIZE(sysval_validation_data)) @@ -14337,11 +19870,23 @@ index 747238e2fee..b47f12d2188 100644 + unsigned int mask; + + switch (signature_type) -+ { + { +- struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; + case SIGNATURE_TYPE_INPUT: + mask = data->input; + break; -+ + +- if (data->write_mask == 0) +- { +- data->write_mask = dst->write_mask; +- data->first_assigned = ctx->instruction_idx; +- } +- else +- { +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE, +- "SSA register is already assigned at instruction %zu.", +- data->first_assigned); +- } + case SIGNATURE_TYPE_OUTPUT: + mask = data->output; + break; @@ -14352,8 +19897,12 @@ index 747238e2fee..b47f12d2188 100644 + + default: + vkd3d_unreachable(); -+ } -+ + } +- break; + +- case VKD3DSPR_IMMCONST: +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid IMMCONST register used as destination parameter."); + if (!(mask & (1u << ctx->program->shader_version.type))) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid system value semantic %#x.", @@ -14379,17 +19928,20 @@ index 747238e2fee..b47f12d2188 100644 + case VKD3D_SHADER_COMPONENT_INT: + case VKD3D_SHADER_COMPONENT_UINT: + integer_type = true; -+ break; -+ + break; + +- case VKD3DSPR_IMMCONST64: +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, +- "Invalid IMMCONST64 register used as destination parameter."); + case VKD3D_SHADER_COMPONENT_FLOAT: -+ break; -+ -+ default: + break; + + default: + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid component type %#x.", + idx, signature_type_name, element->component_type); -+ break; -+ } + break; + } + + if (element->min_precision >= VKD3D_SHADER_MINIMUM_PRECISION_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, @@ -14406,33 +19958,51 @@ index 747238e2fee..b47f12d2188 100644 + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "element %u of %s signature: Invalid interpolation mode %#x for integer component type.", + idx, signature_type_name, element->interpolation_mode); -+} -+ + } + +-static void vsir_validate_src_param(struct validation_context *ctx, +- const struct vkd3d_shader_src_param *src) +static const unsigned int allowed_signature_phases[] = -+{ + { +- vsir_validate_register(ctx, &src->reg); + [SIGNATURE_TYPE_INPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, + [SIGNATURE_TYPE_OUTPUT] = PS_BIT | VS_BIT | GS_BIT | HS_BIT | DS_BIT, + [SIGNATURE_TYPE_PATCH_CONSTANT] = HS_BIT | DS_BIT, +}; -+ + +- if (src->swizzle & ~0x03030303u) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.", +- src->swizzle); +static void vsir_validate_signature(struct validation_context *ctx, + const struct shader_signature *signature, enum vsir_signature_type signature_type) +{ + unsigned int i; -+ + +- if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.", +- src->reg.dimension, src->swizzle); + if (signature->element_count != 0 && !(allowed_signature_phases[signature_type] + & (1u << ctx->program->shader_version.type))) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Unexpected %s signature.", signature_type_names[signature_type]); -+ + +- if (src->modifiers >= VKD3DSPSM_COUNT) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.", +- src->modifiers); + for (i = 0; i < signature->element_count; ++i) + vsir_validate_signature_element(ctx, signature, signature_type, i); +} -+ + +- switch (src->reg.type) +static const char *name_from_cf_type(enum vsir_control_flow_type type) +{ + switch (type) -+ { + { +- case VKD3DSPR_SSA: +- if (src->reg.idx[0].offset < ctx->program->ssa_count) +- { +- struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; +- unsigned int i; + case VSIR_CF_STRUCTURED: + return "structured"; + case VSIR_CF_BLOCKS: @@ -14585,6 +20155,11 @@ index 747238e2fee..b47f12d2188 100644 + || instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, + "Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain); ++ ++ if (instruction->declaration.tessellator_domain != ctx->program->tess_domain) ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "DCL_TESSELLATOR_DOMAIN argument %#x doesn't match the shader tessellator domain %#x.", ++ instruction->declaration.tessellator_domain, ctx->program->tess_domain); +} + +static void vsir_validate_dcl_tessellator_output_primitive(struct validation_context *ctx, @@ -14718,30 +20293,18 @@ index 747238e2fee..b47f12d2188 100644 + "Invalid source count %u for a PHI instruction, it must be an even number.", + instruction->src_count); + incoming_count = instruction->src_count / 2; - -- switch (src->reg.type) ++ + for (i = 0; i < incoming_count; ++i) - { -- case VKD3DSPR_SSA: -- if (src->reg.idx[0].offset < ctx->program->ssa_count) -- { -- struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; -- unsigned int i; ++ { + unsigned int value_idx = 2 * i; + unsigned int label_idx = 2 * i + 1; - -- for (i = 0; i < VKD3D_VEC4_SIZE; ++i) -- data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); -- } -- break; ++ + if (!register_is_constant_or_undef(&instruction->src[value_idx].reg) + && !register_is_ssa(&instruction->src[value_idx].reg)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid value register for incoming %u of type %#x in PHI instruction, " + "expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type); - -- default: -- break; ++ + if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid value dimension %#x for incoming %u in PHI instruction, expected scalar.", @@ -14751,7 +20314,7 @@ index 747238e2fee..b47f12d2188 100644 + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Invalid label register for case %u of type %#x in PHI instruction, " + "expected LABEL.", i, instruction->src[value_idx].reg.type); - } ++ } + + if (instruction->dst_count < 1) + return; @@ -14765,12 +20328,19 @@ index 747238e2fee..b47f12d2188 100644 + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, + "Invalid destination dimension %#x in PHI instruction, expected scalar.", + instruction->dst[0].reg.dimension); -+ + +- for (i = 0; i < VKD3D_VEC4_SIZE; ++i) +- data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i)); +- } +- break; + if (instruction->dst[0].modifiers != VKD3DSPDM_NONE) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, + "Invalid modifiers %#x for the destination of a PHI instruction, expected none.", + instruction->dst[0].modifiers); -+ + +- default: +- break; +- } + if (instruction->dst[0].shift != 0) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, + "Invalid shift %#x for the destination of a PHI instruction, expected none.", @@ -14936,7 +20506,7 @@ index 747238e2fee..b47f12d2188 100644 static void vsir_validate_instruction(struct validation_context *ctx) { -@@ -6148,136 +7611,40 @@ static void vsir_validate_instruction(struct validation_context *ctx) +@@ -6148,136 +8176,40 @@ static void vsir_validate_instruction(struct validation_context *ctx) instruction->opcode); } @@ -15096,7 +20666,7 @@ index 747238e2fee..b47f12d2188 100644 validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", instruction->opcode); -@@ -6285,271 +7652,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) +@@ -6285,271 +8217,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) } } @@ -15265,7 +20835,8 @@ index 747238e2fee..b47f12d2188 100644 - case VKD3DSIH_SWITCH_MONOLITHIC: - { - unsigned int case_count; -- ++ const struct vsir_validator_instruction_desc *desc; + - vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS); - vsir_validate_dst_count(ctx, instruction, 0); - /* Parameters are source, default label, merge label and @@ -15276,8 +20847,7 @@ index 747238e2fee..b47f12d2188 100644 - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.", - instruction->src_count); -+ const struct vsir_validator_instruction_desc *desc; - +- - if (!vsir_register_is_label(&instruction->src[1].reg)) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, - "Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.", @@ -15377,11 +20947,17 @@ index 747238e2fee..b47f12d2188 100644 } } -@@ -6563,19 +7679,71 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c +@@ -6563,19 +8244,84 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c .null_location = {.source_name = source_name}, .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, + .invalid_instruction_idx = true, ++ .outer_tess_idxs[0] = ~0u, ++ .outer_tess_idxs[1] = ~0u, ++ .outer_tess_idxs[2] = ~0u, ++ .outer_tess_idxs[3] = ~0u, ++ .inner_tess_idxs[0] = ~0u, ++ .inner_tess_idxs[1] = ~0u, }; unsigned int i; @@ -15392,18 +20968,28 @@ index 747238e2fee..b47f12d2188 100644 + { + case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: ++ if (program->tess_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID ++ || program->tess_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Invalid tessellation domain %#x.", program->tess_domain); + break; + + default: + if (program->patch_constant_signature.element_count != 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Patch constant signature is only valid for hull and domain shaders."); ++ ++ if (program->tess_domain != VKD3D_TESSELLATOR_DOMAIN_INVALID) ++ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, ++ "Invalid tessellation domain %#x.", program->tess_domain); + } + + switch (program->shader_version.type) + { -+ case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_DOMAIN: ++ break; ++ ++ case VKD3D_SHADER_TYPE_HULL: + case VKD3D_SHADER_TYPE_GEOMETRY: + if (program->input_control_point_count == 0) + validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, @@ -15420,9 +21006,6 @@ index 747238e2fee..b47f12d2188 100644 + switch (program->shader_version.type) + { + case VKD3D_SHADER_TYPE_HULL: -+ if (program->output_control_point_count == 0) -+ validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -+ "Invalid zero output control point count."); + break; + + default: @@ -15450,7 +21033,7 @@ index 747238e2fee..b47f12d2188 100644 vsir_validate_instruction(&ctx); ctx.invalid_instruction_idx = true; -@@ -6610,74 +7778,75 @@ fail: +@@ -6610,74 +8356,107 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } @@ -15462,23 +21045,18 @@ index 747238e2fee..b47f12d2188 100644 + enum vkd3d_result (*step)(struct vsir_program *program, struct vsir_transformation_context *ctx)) { - enum vkd3d_result result = VKD3D_OK; -- -- if ((result = vsir_program_lower_instructions(program, message_context)) < 0) -- return result; + if (ctx->result < 0) + return; -- if (program->shader_version.major >= 6) +- if ((result = vsir_program_lower_instructions(program, message_context)) < 0) +- return result; + if ((ctx->result = step(ctx->program, ctx)) < 0) - { -- if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) -- return result; ++ { + WARN("Transformation \"%s\" failed with result %d.\n", step_name, ctx->result); + return; + } -- if ((result = lower_switch_to_if_ladder(program)) < 0) -- return result; +- if (program->shader_version.major >= 6) + if ((ctx->result = vsir_program_validate(ctx->program, ctx->config_flags, + ctx->compile_info->source_name, ctx->message_context)) < 0) + { @@ -15486,9 +21064,38 @@ index 747238e2fee..b47f12d2188 100644 + return; + } +} ++ ++/* Transformations which should happen at parse time, i.e. before scan ++ * information is returned to the user. ++ * ++ * In particular, some passes need to modify the signature, and ++ * vkd3d_shader_scan() should report the modified signature for the given ++ * target. */ ++enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vsir_transformation_context ctx = + { +- if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0) +- return result; ++ .result = VKD3D_OK, ++ .program = program, ++ .config_flags = config_flags, ++ .compile_info = compile_info, ++ .message_context = message_context, ++ }; + +- if ((result = lower_switch_to_if_ladder(program)) < 0) +- return result; ++ /* For vsir_program_ensure_diffuse(). */ ++ if (program->shader_version.major <= 2) ++ vsir_transform(&ctx, vsir_program_add_diffuse_output); - if ((result = vsir_program_structurize(program, message_context)) < 0) - return result; ++ return ctx.result; ++} ++ +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ @@ -15517,18 +21124,18 @@ index 747238e2fee..b47f12d2188 100644 } else { - if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) +- if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - { - if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) - return result; - } -+ vsir_transform(&ctx, vsir_program_remap_output_signature); - - if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) +- +- if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) - { - if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) - return result; -- ++ vsir_transform(&ctx, vsir_program_ensure_ret); + - if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, - &program->input_signature)) < 0) - return result; @@ -15536,11 +21143,16 @@ index 747238e2fee..b47f12d2188 100644 - - if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) - return result; -- ++ if (program->shader_version.major <= 2) ++ vsir_transform(&ctx, vsir_program_ensure_diffuse); + - if ((result = instruction_array_normalise_flat_constants(program)) < 0) - return result; -- ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ vsir_transform(&ctx, vsir_program_remap_output_signature); + - remove_dead_code(program); ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) + vsir_transform(&ctx, vsir_program_flatten_hull_shader_phases); - if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) @@ -15559,10 +21171,12 @@ index 747238e2fee..b47f12d2188 100644 - if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) - return result; ++ vsir_transform(&ctx, vsir_program_apply_flat_interpolation); + vsir_transform(&ctx, vsir_program_insert_alpha_test); + vsir_transform(&ctx, vsir_program_insert_clip_planes); + vsir_transform(&ctx, vsir_program_insert_point_size); + vsir_transform(&ctx, vsir_program_insert_point_size_clamp); ++ vsir_transform(&ctx, vsir_program_insert_point_coord); if (TRACE_ON()) - vkd3d_shader_trace(program); @@ -15577,10 +21191,10 @@ index 747238e2fee..b47f12d2188 100644 } diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c new file mode 100644 -index 00000000000..5baefbc1f44 +index 00000000000..df3edeaa4e6 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -0,0 +1,881 @@ +@@ -0,0 +1,898 @@ +/* + * Copyright 2024 Feifan He for CodeWeavers + * @@ -15622,6 +21236,8 @@ index 00000000000..5baefbc1f44 + struct vkd3d_shader_message_context *message_context; + unsigned int indent; + const char *prefix; ++ bool failed; ++ + const struct vkd3d_shader_interface_info *interface_info; + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info; +}; @@ -15634,6 +21250,7 @@ index 00000000000..5baefbc1f44 + va_start(args, fmt); + vkd3d_shader_verror(gen->message_context, &gen->location, error, fmt, args); + va_end(args); ++ gen->failed = true; +} + +static const char *msl_get_prefix(enum vkd3d_shader_type type) @@ -15895,6 +21512,9 @@ index 00000000000..5baefbc1f44 + + switch (ins->opcode) + { ++ case VKD3DSIH_DCL_INPUT: ++ case VKD3DSIH_DCL_OUTPUT: ++ case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_NOP: + break; + case VKD3DSIH_MOV: @@ -16009,7 +21629,7 @@ index 00000000000..5baefbc1f44 + size /= VKD3D_VEC4_SIZE * sizeof(uint32_t); + + vkd3d_string_buffer_printf(buffer, -+ "constant vkd3d_vec4 (&cb_%u)[%zu] [[id(%u)]];", cbv->register_id, size, binding->binding); ++ "constant vkd3d_vec4 *cb_%u [[id(%u)]];", cbv->register_id, binding->binding); +}; + +static void msl_generate_descriptor_struct_declarations(struct msl_generator *gen) @@ -16298,7 +21918,7 @@ index 00000000000..5baefbc1f44 + case VKD3D_SHADER_SV_POSITION: + vkd3d_string_buffer_printf(buffer, " output.shader_out_%u", i); + msl_print_write_mask(buffer, e->mask); -+ vkd3d_string_buffer_printf(buffer, " = %s_out", gen->prefix); ++ vkd3d_string_buffer_printf(buffer, " = %s_out[%u]", gen->prefix, e->register_index); + msl_print_register_datatype(buffer, gen, vkd3d_data_type_from_component_type(e->component_type)); + msl_print_write_mask(buffer, e->mask); + break; @@ -16359,7 +21979,7 @@ index 00000000000..5baefbc1f44 + vkd3d_string_buffer_printf(gen->buffer, " return output;\n}\n"); +} + -+static void msl_generator_generate(struct msl_generator *gen) ++static int msl_generator_generate(struct msl_generator *gen, struct vkd3d_shader_code *out) +{ + const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; + unsigned int i; @@ -16368,6 +21988,10 @@ index 00000000000..5baefbc1f44 + + vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + ++ if (gen->program->global_flags) ++ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, ++ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)gen->program->global_flags); ++ + vkd3d_string_buffer_printf(gen->buffer, "union vkd3d_vec4\n{\n"); + vkd3d_string_buffer_printf(gen->buffer, " uint4 u;\n"); + vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); @@ -16406,6 +22030,13 @@ index 00000000000..5baefbc1f44 + + if (TRACE_ON()) + vkd3d_string_buffer_trace(gen->buffer); ++ ++ if (gen->failed) ++ return VKD3D_ERROR_INVALID_SHADER; ++ ++ vkd3d_shader_code_from_string_buffer(out, gen->buffer); ++ ++ return VKD3D_OK; +} + +static void msl_generator_cleanup(struct msl_generator *gen) @@ -16444,7 +22075,8 @@ index 00000000000..5baefbc1f44 + +int msl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context) +{ + struct msl_generator generator; + int ret; @@ -16452,15 +22084,14 @@ index 00000000000..5baefbc1f44 + if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) + return ret; + -+ VKD3D_ASSERT(program->normalised_io); -+ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + + if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) + return ret; -+ msl_generator_generate(&generator); ++ ret = msl_generator_generate(&generator, out); + msl_generator_cleanup(&generator); + -+ return VKD3D_ERROR_INVALID_SHADER; ++ return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h index 9806614a35b..a98c8ae3df5 100644 @@ -16706,7 +22337,7 @@ index 366e351e3b5..c6be17bd230 100644 vkd3d_free(macro); } diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 49979ab2491..6a28e2cd68e 100644 +index 49979ab2491..81555e702ec 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -97,15 +97,37 @@ static enum vkd3d_result vkd3d_spirv_binary_to_text(const struct vkd3d_shader_co @@ -16754,6 +22385,15 @@ index 49979ab2491..6a28e2cd68e 100644 } else { +@@ -228,7 +250,7 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d + #define VKD3D_SPIRV_VERSION_1_0 0x00010000 + #define VKD3D_SPIRV_VERSION_1_3 0x00010300 + #define VKD3D_SPIRV_GENERATOR_ID 18 +-#define VKD3D_SPIRV_GENERATOR_VERSION 13 ++#define VKD3D_SPIRV_GENERATOR_VERSION 14 + #define VKD3D_SPIRV_GENERATOR_MAGIC vkd3d_make_u32(VKD3D_SPIRV_GENERATOR_VERSION, VKD3D_SPIRV_GENERATOR_ID) + + struct vkd3d_spirv_stream @@ -277,6 +299,16 @@ static void vkd3d_spirv_stream_free(struct vkd3d_spirv_stream *stream) vkd3d_spirv_stream_clear(stream); } @@ -16793,7 +22433,7 @@ index 49979ab2491..6a28e2cd68e 100644 static uint32_t vkd3d_spirv_get_op_constant_composite(struct vkd3d_spirv_builder *builder, uint32_t result_type, const uint32_t *constituents, unsigned int constituent_count) { -@@ -1870,29 +1910,37 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_build +@@ -1870,29 +1910,41 @@ static uint32_t vkd3d_spirv_build_op_glsl_std450_nclamp(struct vkd3d_spirv_build static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, enum vkd3d_shader_component_type component_type, unsigned int component_count) { @@ -16801,7 +22441,11 @@ index 49979ab2491..6a28e2cd68e 100644 + uint32_t scalar_id, type_id; + + VKD3D_ASSERT(component_type < VKD3D_SHADER_COMPONENT_TYPE_COUNT); -+ VKD3D_ASSERT(1 <= component_count && component_count <= VKD3D_VEC4_SIZE); ++ if (!component_count || component_count > VKD3D_VEC4_SIZE) ++ { ++ ERR("Invalid component count %u.\n", component_count); ++ return 0; ++ } + + if ((type_id = builder->numeric_type_ids[component_type][component_count - 1])) + return type_id; @@ -16838,7 +22482,7 @@ index 49979ab2491..6a28e2cd68e 100644 default: FIXME("Unhandled component type %#x.\n", component_type); return 0; -@@ -1902,46 +1950,21 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, +@@ -1902,46 +1954,21 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, { VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); @@ -16893,7 +22537,7 @@ index 49979ab2491..6a28e2cd68e 100644 } static void vkd3d_spirv_builder_init(struct vkd3d_spirv_builder *builder, const char *entry_point) -@@ -1996,9 +2019,7 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, +@@ -1996,9 +2023,7 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, { uint64_t capability_mask = builder->capability_mask; struct vkd3d_spirv_stream stream; @@ -16903,17 +22547,17 @@ index 49979ab2491..6a28e2cd68e 100644 vkd3d_spirv_stream_init(&stream); -@@ -2053,26 +2074,20 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, +@@ -2053,26 +2078,20 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, if (builder->invocation_count) vkd3d_spirv_build_op_execution_mode(&builder->execution_mode_stream, builder->main_function_id, SpvExecutionModeInvocations, &builder->invocation_count, 1); - vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream); - +- - vkd3d_spirv_stream_append(&stream, &builder->debug_stream); - vkd3d_spirv_stream_append(&stream, &builder->annotation_stream); - vkd3d_spirv_stream_append(&stream, &builder->global_stream); - vkd3d_spirv_stream_append(&stream, &builder->function_stream); -- + - if (!(code = vkd3d_calloc(stream.word_count, sizeof(*code)))) + if (!vkd3d_spirv_stream_append(&stream, &builder->execution_mode_stream) + || !vkd3d_spirv_stream_append(&stream, &builder->debug_stream) @@ -16936,7 +22580,46 @@ index 49979ab2491..6a28e2cd68e 100644 return true; } -@@ -2647,8 +2662,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p +@@ -2083,28 +2102,26 @@ static const struct vkd3d_spirv_resource_type + SpvDim dim; + uint32_t arrayed; + uint32_t ms; +- + unsigned int coordinate_component_count; +- unsigned int offset_component_count; + + SpvCapability capability; + SpvCapability uav_capability; + } + vkd3d_spirv_resource_type_table[] = + { +- {VKD3D_SHADER_RESOURCE_BUFFER, SpvDimBuffer, 0, 0, 1, 0, ++ {VKD3D_SHADER_RESOURCE_BUFFER, SpvDimBuffer, 0, 0, 1, + SpvCapabilitySampledBuffer, SpvCapabilityImageBuffer}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_1D, SpvDim1D, 0, 0, 1, 1, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_1D, SpvDim1D, 0, 0, 1, + SpvCapabilitySampled1D, SpvCapabilityImage1D}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2, 2}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2, 2}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3, 3}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 0}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, 1, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2}, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2}, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3}, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3}, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, + SpvCapabilitySampled1D, SpvCapabilityImage1D}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3, 2}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3, 2}, +- {VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 0, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3}, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3}, ++ {VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, + SpvCapabilitySampledCubeArray, SpvCapabilityImageCubeArray}, + }; + +@@ -2647,8 +2664,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { compiler->xfb_info = vkd3d_find_struct(compile_info->next, TRANSFORM_FEEDBACK_INFO); @@ -16945,7 +22628,7 @@ index 49979ab2491..6a28e2cd68e 100644 compiler->shader_interface = *shader_interface; if (shader_interface->push_constant_buffer_count) -@@ -2675,6 +2688,11 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p +@@ -2675,6 +2690,11 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p } } @@ -16957,7 +22640,32 @@ index 49979ab2491..6a28e2cd68e 100644 compiler->scan_descriptor_info = scan_descriptor_info; compiler->phase = VKD3DSIH_INVALID; -@@ -3252,18 +3270,6 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder * +@@ -3174,6 +3194,14 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s + case VKD3DSPR_CONSTBUFFER: + snprintf(buffer, buffer_size, "cb%u_%u", reg->idx[0].offset, reg->idx[1].offset); + break; ++ case VKD3DSPR_RASTOUT: ++ if (idx == VSIR_RASTOUT_POINT_SIZE) ++ { ++ snprintf(buffer, buffer_size, "oPts"); ++ break; ++ } ++ FIXME("Unhandled rastout register %#x.\n", idx); ++ return false; + case VKD3DSPR_INPUT: + snprintf(buffer, buffer_size, "v%u", idx); + break; +@@ -3234,6 +3262,9 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s + case VKD3DSPR_WAVELANEINDEX: + snprintf(buffer, buffer_size, "vWaveLaneIndex"); + break; ++ case VKD3DSPR_POINT_COORD: ++ snprintf(buffer, buffer_size, "vPointCoord"); ++ break; + default: + FIXME("Unhandled register %#x.\n", reg->type); + snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); +@@ -3252,18 +3283,6 @@ static void spirv_compiler_emit_register_debug_name(struct vkd3d_spirv_builder * vkd3d_spirv_build_op_name(builder, id, "%s", debug_name); } @@ -16976,7 +22684,7 @@ index 49979ab2491..6a28e2cd68e 100644 static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compiler, struct vkd3d_spirv_stream *stream, SpvStorageClass storage_class, enum vkd3d_shader_component_type component_type, unsigned int component_count, -@@ -3273,10 +3279,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil +@@ -3273,10 +3292,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil uint32_t type_id, length_id, ptr_type_id; unsigned int i; @@ -16987,7 +22695,7 @@ index 49979ab2491..6a28e2cd68e 100644 type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); for (i = 0; i < length_count; ++i) { -@@ -3290,6 +3292,14 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil +@@ -3290,6 +3305,14 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } @@ -17002,7 +22710,7 @@ index 49979ab2491..6a28e2cd68e 100644 static const struct vkd3d_spec_constant_info { enum vkd3d_shader_parameter_name name; -@@ -3316,8 +3326,10 @@ static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_ +@@ -3316,8 +3339,10 @@ static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_ return NULL; } @@ -17014,7 +22722,7 @@ index 49979ab2491..6a28e2cd68e 100644 if (!compiler->current_spec_constant_id) { unsigned int i, id = 0; -@@ -3327,28 +3339,52 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com +@@ -3327,28 +3352,52 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) @@ -17074,7 +22782,7 @@ index 49979ab2491..6a28e2cd68e 100644 if (info) vkd3d_spirv_build_op_name(builder, id, "%s", info->debug_name); -@@ -3365,7 +3401,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile +@@ -3365,7 +3414,8 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile } static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, @@ -17084,7 +22792,7 @@ index 49979ab2491..6a28e2cd68e 100644 { unsigned int i; -@@ -3375,17 +3412,17 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler +@@ -3375,17 +3425,17 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler return compiler->spec_constants[i].id; } @@ -17105,7 +22813,7 @@ index 49979ab2491..6a28e2cd68e 100644 ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, compiler->spirv_parameter_info[index].buffer_id, -@@ -3393,48 +3430,49 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi +@@ -3393,48 +3443,49 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } @@ -17177,7 +22885,7 @@ index 49979ab2491..6a28e2cd68e 100644 } static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, -@@ -4210,7 +4248,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, +@@ -4210,7 +4261,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, else if (reg->type == VKD3DSPR_UNDEF) return spirv_compiler_emit_load_undef(compiler, reg, write_mask); else if (reg->type == VKD3DSPR_PARAMETER) @@ -17187,7 +22895,7 @@ index 49979ab2491..6a28e2cd68e 100644 component_count = vsir_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -4500,9 +4539,24 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, +@@ -4500,9 +4552,24 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, uint32_t val_id) { @@ -17214,7 +22922,7 @@ index 49979ab2491..6a28e2cd68e 100644 spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, val_id); } -@@ -4809,6 +4863,10 @@ static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = +@@ -4809,6 +4876,10 @@ static const struct vkd3d_spirv_builtin vkd3d_pixel_shader_position_builtin = { VKD3D_SHADER_COMPONENT_FLOAT, 4, SpvBuiltInFragCoord, frag_coord_fixup, }; @@ -17225,7 +22933,16 @@ index 49979ab2491..6a28e2cd68e 100644 static const struct { enum vkd3d_shader_register_type reg_type; -@@ -5398,7 +5456,11 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, +@@ -4828,6 +4899,8 @@ vkd3d_register_builtins[] = + + {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, + ++ {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, ++ + {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, + +@@ -5398,7 +5471,11 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); VKD3D_ASSERT(reg->idx_count < 2); @@ -17238,7 +22955,7 @@ index 49979ab2491..6a28e2cd68e 100644 { FIXME("Unhandled register %#x.\n", reg->type); return; -@@ -5451,7 +5513,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, +@@ -5451,7 +5528,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, const struct shader_signature *shader_signature; const struct vkd3d_spirv_builtin *builtin; enum vkd3d_shader_sysval_semantic sysval; @@ -17247,7 +22964,7 @@ index 49979ab2491..6a28e2cd68e 100644 bool use_private_variable = false; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; -@@ -5502,7 +5564,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, +@@ -5502,7 +5579,6 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, use_private_variable = true; } @@ -17255,7 +22972,7 @@ index 49979ab2491..6a28e2cd68e 100644 vkd3d_symbol_make_io(®_symbol, reg_type, element_idx); if (rb_get(&compiler->symbol_table, ®_symbol)) -@@ -5580,7 +5641,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, +@@ -5580,7 +5656,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, @@ -17264,7 +22981,7 @@ index 49979ab2491..6a28e2cd68e 100644 reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); -@@ -5591,7 +5652,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, +@@ -5591,7 +5667,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, if (use_private_variable) { compiler->private_output_variable[element_idx] = var_id; @@ -17273,7 +22990,20 @@ index 49979ab2491..6a28e2cd68e 100644 if (!compiler->epilogue_function_id) compiler->epilogue_function_id = vkd3d_spirv_alloc_id(builder); } -@@ -6120,12 +6181,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, +@@ -5846,11 +5922,8 @@ static size_t spirv_compiler_get_current_function_location(struct spirv_compiler + return builder->main_function_location; + } + +-static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_global_flags(struct spirv_compiler *compiler, enum vsir_global_flags flags) + { +- enum vkd3d_shader_global_flags flags = instruction->declaration.global_flags; +- + if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) + { + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeEarlyFragmentTests, NULL, 0); +@@ -6120,12 +6193,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, @@ -17288,7 +23018,7 @@ index 49979ab2491..6a28e2cd68e 100644 uint32_t array_type_id, ptr_type_id, var_id; bool write_only = false, coherent = false; struct vkd3d_symbol symbol; -@@ -6135,12 +6196,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * +@@ -6135,12 +6208,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * resource_type, is_uav_counter, &binding_address); var_info->binding_base_idx = binding_address.binding_base_idx; @@ -17304,7 +23034,7 @@ index 49979ab2491..6a28e2cd68e 100644 } if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u -@@ -6194,11 +6254,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * +@@ -6194,11 +6266,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * } static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, @@ -17318,7 +23048,7 @@ index 49979ab2491..6a28e2cd68e 100644 struct vkd3d_push_constant_buffer_binding *push_cb; struct vkd3d_descriptor_variable_info var_info; struct vkd3d_shader_register reg; -@@ -6206,7 +6267,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, +@@ -6206,7 +6279,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, unsigned int size; vsir_register_init(®, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 3); @@ -17327,7 +23057,7 @@ index 49979ab2491..6a28e2cd68e 100644 reg.idx[1].offset = range->first; reg.idx[2].offset = range->last; -@@ -6239,7 +6300,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, +@@ -6239,7 +6312,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, @@ -17336,7 +23066,7 @@ index 49979ab2491..6a28e2cd68e 100644 vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6275,7 +6336,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi +@@ -6275,7 +6348,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi } static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, @@ -17345,7 +23075,7 @@ index 49979ab2491..6a28e2cd68e 100644 { const SpvStorageClass storage_class = SpvStorageClassUniformConstant; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -@@ -6285,7 +6346,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi +@@ -6285,7 +6358,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi uint32_t type_id, var_id; vsir_register_init(®, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); @@ -17354,7 +23084,7 @@ index 49979ab2491..6a28e2cd68e 100644 vkd3d_symbol_make_sampler(®_symbol, ®); reg_symbol.info.sampler.range = *range; -@@ -6295,8 +6356,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi +@@ -6295,8 +6368,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi return; type_id = vkd3d_spirv_get_op_type_sampler(builder); @@ -17365,7 +23095,7 @@ index 49979ab2491..6a28e2cd68e 100644 vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6346,7 +6407,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty +@@ -6346,7 +6419,7 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, @@ -17374,7 +23104,7 @@ index 49979ab2491..6a28e2cd68e 100644 { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_shader_descriptor_info1 *d; -@@ -6369,7 +6430,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler +@@ -6369,7 +6442,7 @@ static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler sampled_type_id = vkd3d_spirv_get_type_id(builder, data_type, 1); return vkd3d_spirv_get_op_type_image(builder, sampled_type_id, resource_type_info->dim, @@ -17383,7 +23113,7 @@ index 49979ab2491..6a28e2cd68e 100644 reg->type == VKD3DSPR_UAV ? 2 : 1, format); } -@@ -6384,18 +6445,14 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi +@@ -6384,18 +6457,14 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi const struct vkd3d_shader_combined_resource_sampler *current; uint32_t image_type_id, type_id, ptr_type_id, var_id; enum vkd3d_shader_binding_flag resource_type_flag; @@ -17402,7 +23132,7 @@ index 49979ab2491..6a28e2cd68e 100644 current = &shader_interface->combined_samplers[i]; if (current->resource_space != resource_range->space || current->resource_index != resource_range->first) -@@ -6417,16 +6474,8 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi +@@ -6417,16 +6486,8 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi current->sampler_space, current->binding.count); } @@ -17420,7 +23150,7 @@ index 49979ab2491..6a28e2cd68e 100644 type_id = vkd3d_spirv_get_op_type_sampled_image(builder, image_type_id); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, type_id); -@@ -6461,21 +6510,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi +@@ -6461,21 +6522,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi } static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, @@ -17449,7 +23179,7 @@ index 49979ab2491..6a28e2cd68e 100644 if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -@@ -6489,7 +6541,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6489,7 +6553,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp return; } @@ -17458,7 +23188,7 @@ index 49979ab2491..6a28e2cd68e 100644 if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) { -@@ -6517,19 +6569,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6517,19 +6581,15 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp else { type_id = spirv_compiler_get_image_type_id(compiler, ®, range, @@ -17482,7 +23212,7 @@ index 49979ab2491..6a28e2cd68e 100644 { if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, -@@ -6543,7 +6591,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6543,7 +6603,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp compiler->use_invocation_interlock = true; } @@ -17491,7 +23221,7 @@ index 49979ab2491..6a28e2cd68e 100644 { VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ -@@ -6571,7 +6619,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp +@@ -6571,7 +6631,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, @@ -17500,7 +23230,7 @@ index 49979ab2491..6a28e2cd68e 100644 } } -@@ -6709,7 +6757,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi +@@ -6709,7 +6769,8 @@ static void spirv_compiler_emit_dcl_input_primitive(struct spirv_compiler *compi static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) { @@ -17510,7 +23240,7 @@ index 49979ab2491..6a28e2cd68e 100644 /* Set the point size. Point sprites are not supported in d3d10+, but * point primitives can still be used with e.g. stream output. Vulkan -@@ -6723,7 +6772,8 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) +@@ -6723,7 +6784,8 @@ static void spirv_compiler_emit_point_size(struct spirv_compiler *compiler) || compiler->write_tess_geom_point_size) { vkd3d_spirv_build_op_store(&compiler->spirv_builder, @@ -17520,7 +23250,25 @@ index 49979ab2491..6a28e2cd68e 100644 spirv_compiler_get_constant_float(compiler, 1.0f), SpvMemoryAccessMaskNone); } } -@@ -6845,10 +6895,9 @@ static void spirv_compiler_emit_tessellator_partitioning(struct spirv_compiler * +@@ -6760,15 +6822,11 @@ static void spirv_compiler_emit_dcl_gs_instances(struct spirv_compiler *compiler + compiler->spirv_builder.invocation_count = instruction->declaration.count; + } + +-static void spirv_compiler_emit_dcl_tessellator_domain(struct spirv_compiler *compiler, +- const struct vkd3d_shader_instruction *instruction) ++static void spirv_compiler_emit_tessellator_domain(struct spirv_compiler *compiler, ++ enum vkd3d_tessellator_domain domain) + { +- enum vkd3d_tessellator_domain domain = instruction->declaration.tessellator_domain; + SpvExecutionMode mode; + +- if (compiler->shader_type == VKD3D_SHADER_TYPE_HULL && spirv_compiler_is_opengl_target(compiler)) +- return; +- + switch (domain) + { + case VKD3D_TESSELLATOR_DOMAIN_LINE: +@@ -6845,10 +6903,9 @@ static void spirv_compiler_emit_tessellator_partitioning(struct spirv_compiler * spirv_compiler_emit_execution_mode(compiler, mode, NULL, 0); } @@ -17533,7 +23281,7 @@ index 49979ab2491..6a28e2cd68e 100644 const uint32_t local_size[] = {group_size->x, group_size->y, group_size->z}; spirv_compiler_emit_execution_mode(compiler, -@@ -7391,7 +7440,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, +@@ -7391,7 +7448,7 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, uint32_t components[VKD3D_VEC4_SIZE]; if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA @@ -17542,7 +23290,7 @@ index 49979ab2491..6a28e2cd68e 100644 goto general_implementation; spirv_compiler_get_register_info(compiler, &dst->reg, &dst_reg_info); -@@ -8433,11 +8482,10 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, +@@ -8433,11 +8490,10 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t sampler_var_id, sampler_id, sampled_image_type_id; const struct vkd3d_symbol *symbol = NULL; @@ -17555,7 +23303,7 @@ index 49979ab2491..6a28e2cd68e 100644 if (resource_reg->type == VKD3DSPR_RESOURCE) symbol = spirv_compiler_find_combined_sampler(compiler, resource_reg, sampler_reg); -@@ -8491,7 +8539,7 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, +@@ -8491,7 +8547,7 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, image->image_type_id = spirv_compiler_get_image_type_id(compiler, resource_reg, &symbol->info.resource.range, image->resource_type_info, @@ -17564,7 +23312,91 @@ index 49979ab2491..6a28e2cd68e 100644 if (sampled) { -@@ -9569,7 +9617,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co +@@ -8535,9 +8591,11 @@ static uint32_t spirv_compiler_emit_texel_offset(struct spirv_compiler *compiler + const struct vkd3d_shader_instruction *instruction, + const struct vkd3d_spirv_resource_type *resource_type_info) + { ++ unsigned int component_count = resource_type_info->coordinate_component_count - resource_type_info->arrayed; + const struct vkd3d_shader_texel_offset *offset = &instruction->texel_offset; +- unsigned int component_count = resource_type_info->offset_component_count; + int32_t data[4] = {offset->u, offset->v, offset->w, 0}; ++ ++ VKD3D_ASSERT(resource_type_info->dim != SpvDimCube); + return spirv_compiler_get_constant(compiler, + VKD3D_SHADER_COMPONENT_INT, component_count, (const uint32_t *)data); + } +@@ -8622,9 +8680,9 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + const struct vkd3d_shader_src_param *resource, *sampler; ++ unsigned int image_operand_count = 0, component_count; + uint32_t sampled_type_id, coordinate_id, val_id; + SpvImageOperandsMask operands_mask = 0; +- unsigned int image_operand_count = 0; + struct vkd3d_shader_image image; + uint32_t image_operands[3]; + uint32_t coordinate_mask; +@@ -8649,7 +8707,8 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, + case VKD3DSIH_SAMPLE_GRAD: + op = SpvOpImageSampleExplicitLod; + operands_mask |= SpvImageOperandsGradMask; +- coordinate_mask = (1u << image.resource_type_info->offset_component_count) - 1; ++ component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed; ++ coordinate_mask = (1u << component_count) - 1; + image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, + &src[3], coordinate_mask); + image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, +@@ -8738,10 +8797,10 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + unsigned int image_flags = VKD3D_IMAGE_FLAG_SAMPLED; ++ unsigned int component_count, component_idx; + SpvImageOperandsMask operands_mask = 0; + unsigned int image_operand_count = 0; + struct vkd3d_shader_image image; +- unsigned int component_idx; + uint32_t image_operands[1]; + uint32_t coordinate_mask; + bool extended_offset; +@@ -8763,10 +8822,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, + + if (offset) + { ++ component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed; ++ VKD3D_ASSERT(image.resource_type_info->dim != SpvDimCube); + vkd3d_spirv_enable_capability(builder, SpvCapabilityImageGatherExtended); + operands_mask |= SpvImageOperandsOffsetMask; + image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, +- offset, (1u << image.resource_type_info->offset_component_count) - 1); ++ offset, (1u << component_count) - 1); + } + else if (vkd3d_shader_instruction_has_texel_offset(instruction)) + { +@@ -8842,15 +8903,20 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler + uint32_t base_coordinate_id, component_idx; + uint32_t constituents[VKD3D_VEC4_SIZE]; + struct vkd3d_shader_image image; ++ bool storage_buffer_uav = false; + uint32_t indices[2]; + unsigned int i, j; + SpvOp op; + + resource = &src[instruction->src_count - 1]; +- resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); + +- if (resource->reg.type == VKD3DSPR_UAV +- && spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) ++ if (resource->reg.type == VKD3DSPR_UAV) ++ { ++ resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); ++ storage_buffer_uav = spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource); ++ } ++ ++ if (storage_buffer_uav) + { + texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id); +@@ -9569,7 +9635,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co if (src->reg.type == VKD3DSPR_RASTERIZER) { val_id = spirv_compiler_emit_shader_parameter(compiler, @@ -17573,7 +23405,27 @@ index 49979ab2491..6a28e2cd68e 100644 } else { -@@ -10183,9 +10231,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -10132,9 +10198,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + + switch (instruction->opcode) + { +- case VKD3DSIH_DCL_GLOBAL_FLAGS: +- spirv_compiler_emit_dcl_global_flags(compiler, instruction); +- break; + case VKD3DSIH_DCL_INDEXABLE_TEMP: + spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); + break; +@@ -10172,9 +10235,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + spirv_compiler_emit_output_vertex_count(compiler, instruction); + break; +- case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: +- spirv_compiler_emit_dcl_tessellator_domain(compiler, instruction); +- break; + case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: + spirv_compiler_emit_tessellator_output_primitive(compiler, + instruction->declaration.tessellator_output_primitive); +@@ -10183,9 +10243,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, spirv_compiler_emit_tessellator_partitioning(compiler, instruction->declaration.tessellator_partitioning); break; @@ -17583,7 +23435,7 @@ index 49979ab2491..6a28e2cd68e 100644 case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: -@@ -10506,7 +10551,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, +@@ -10506,7 +10563,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_RESOURCE_STRUCTURED: case VKD3DSIH_DCL_UAV_RAW: case VKD3DSIH_DCL_UAV_STRUCTURED: @@ -17591,7 +23443,7 @@ index 49979ab2491..6a28e2cd68e 100644 case VKD3DSIH_HS_DECLS: case VKD3DSIH_NOP: /* nothing to do */ -@@ -10543,6 +10587,15 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) +@@ -10543,6 +10599,23 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) else spirv_compiler_emit_input(compiler, VKD3DSPR_PATCHCONST, i); } @@ -17603,11 +23455,19 @@ index 49979ab2491..6a28e2cd68e 100644 + vsir_dst_param_init(&dst, VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); + dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; + spirv_compiler_emit_output_register(compiler, &dst); ++ } ++ ++ if (compiler->program->has_point_coord) ++ { ++ struct vkd3d_shader_dst_param dst; ++ ++ vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); ++ spirv_compiler_emit_input_register(compiler, &dst); + } } static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) -@@ -10564,23 +10617,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c +@@ -10564,23 +10637,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c switch (descriptor->type) { case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: @@ -17634,7 +23494,7 @@ index 49979ab2491..6a28e2cd68e 100644 break; default: -@@ -10600,10 +10646,13 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10600,10 +10666,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct enum vkd3d_result result = VKD3D_OK; unsigned int i, max_element_count; @@ -17643,22 +23503,22 @@ index 49979ab2491..6a28e2cd68e 100644 compile_info, compiler->message_context)) < 0) return result; -+ VKD3D_ASSERT(program->normalised_io); -+ VKD3D_ASSERT(program->normalised_hull_cp_io); ++ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) return VKD3D_ERROR_OUT_OF_MEMORY; -@@ -10612,6 +10661,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10612,6 +10680,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct spirv_compiler_emit_temps(compiler, program->temp_count); if (program->ssa_count) spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); + if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) + spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); ++ spirv_compiler_emit_global_flags(compiler, program->global_flags); spirv_compiler_emit_descriptor_declarations(compiler); -@@ -10624,7 +10675,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct +@@ -10624,7 +10695,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct { uint32_t type_id, struct_id, ptr_type_id, var_id; @@ -17669,8 +23529,18 @@ index 49979ab2491..6a28e2cd68e 100644 struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); +@@ -10663,6 +10736,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + compiler->input_control_point_count = program->input_control_point_count; + compiler->output_control_point_count = program->output_control_point_count; + ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler)) ++ spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); ++ + if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) + spirv_compiler_emit_shader_signature_outputs(compiler); + diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 84f641cc316..848e78a34d3 100644 +index 84f641cc316..fcfe074e61e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -23,6 +23,7 @@ @@ -17729,15 +23599,18 @@ index 84f641cc316..848e78a34d3 100644 struct sm4_index_range { unsigned int index; -@@ -634,6 +676,7 @@ struct vkd3d_sm4_lookup_tables +@@ -632,8 +674,10 @@ struct sm4_index_range_array + struct vkd3d_sm4_lookup_tables + { const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT]; ++ const struct vkd3d_sm4_opcode_info *opcode_info_from_vsir[VKD3DSIH_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; + const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT]; }; struct vkd3d_shader_sm4_parser -@@ -853,7 +896,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u +@@ -853,7 +897,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u >> VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; } @@ -17746,7 +23619,7 @@ index 84f641cc316..848e78a34d3 100644 shader_sm4_read_dst_param(priv, &tokens, end, reg_data_type, &semantic->resource.reg); shader_sm4_set_descriptor_register_range(priv, &semantic->resource.reg.reg, &semantic->resource.range); -@@ -873,7 +916,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u +@@ -873,7 +917,7 @@ static void shader_sm4_read_dcl_resource(struct vkd3d_shader_instruction *ins, u } } @@ -17755,7 +23628,7 @@ index 84f641cc316..848e78a34d3 100644 ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; shader_sm4_read_register_space(priv, &tokens, end, &semantic->resource.range.space); -@@ -915,7 +958,7 @@ static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, ui +@@ -915,7 +959,7 @@ static void shader_sm4_read_dcl_sampler(struct vkd3d_shader_instruction *ins, ui ins->flags = (opcode_token & VKD3D_SM4_SAMPLER_MODE_MASK) >> VKD3D_SM4_SAMPLER_MODE_SHIFT; if (ins->flags & ~VKD3D_SM4_SAMPLER_COMPARISON) FIXME("Unhandled sampler mode %#x.\n", ins->flags); @@ -17764,7 +23637,7 @@ index 84f641cc316..848e78a34d3 100644 shader_sm4_set_descriptor_register_range(priv, &ins->declaration.sampler.src.reg, &ins->declaration.sampler.range); shader_sm4_read_register_space(priv, &tokens, end, &ins->declaration.sampler.range.space); } -@@ -1115,7 +1158,18 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u +@@ -1115,7 +1159,18 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u struct signature_element *e = vsir_signature_find_element_for_reg( &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); @@ -17784,7 +23657,7 @@ index 84f641cc316..848e78a34d3 100644 } } -@@ -1130,7 +1184,18 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in +@@ -1130,7 +1185,18 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in struct signature_element *e = vsir_signature_find_element_for_reg( &priv->p.program->input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); @@ -17804,7 +23677,27 @@ index 84f641cc316..848e78a34d3 100644 } ins->declaration.register_semantic.sysval_semantic = *tokens; } -@@ -1224,11 +1289,14 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio +@@ -1147,9 +1213,10 @@ static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction * + } + + static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, +- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) ++ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) + { + ins->declaration.global_flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; ++ sm4->p.program->global_flags = ins->declaration.global_flags; + } + + static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, +@@ -1201,6 +1268,7 @@ static void shader_sm5_read_dcl_tessellator_domain(struct vkd3d_shader_instructi + { + ins->declaration.tessellator_domain = (opcode_token & VKD3D_SM5_TESSELLATOR_MASK) + >> VKD3D_SM5_TESSELLATOR_SHIFT; ++ priv->p.program->tess_domain = ins->declaration.tessellator_domain; + } + + static void shader_sm5_read_dcl_tessellator_partitioning(struct vkd3d_shader_instruction *ins, uint32_t opcode, +@@ -1224,11 +1292,14 @@ static void shader_sm5_read_dcl_hs_max_tessfactor(struct vkd3d_shader_instructio } static void shader_sm5_read_dcl_thread_group(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -17820,7 +23713,7 @@ index 84f641cc316..848e78a34d3 100644 } static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -@@ -1237,7 +1305,7 @@ static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, ui +@@ -1237,7 +1308,7 @@ static void shader_sm5_read_dcl_uav_raw(struct vkd3d_shader_instruction *ins, ui struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; const uint32_t *end = &tokens[token_count]; @@ -17829,7 +23722,7 @@ index 84f641cc316..848e78a34d3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); -@@ -1249,7 +1317,7 @@ static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction * +@@ -1249,7 +1320,7 @@ static void shader_sm5_read_dcl_uav_structured(struct vkd3d_shader_instruction * struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; const uint32_t *end = &tokens[token_count]; @@ -17838,7 +23731,7 @@ index 84f641cc316..848e78a34d3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); ins->flags = (opcode_token & VKD3D_SM5_UAV_FLAGS_MASK) >> VKD3D_SM5_UAV_FLAGS_SHIFT; resource->byte_stride = *tokens++; -@@ -1286,7 +1354,7 @@ static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruct +@@ -1286,7 +1357,7 @@ static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruct struct vkd3d_shader_structured_resource *resource = &ins->declaration.structured_resource; const uint32_t *end = &tokens[token_count]; @@ -17847,7 +23740,7 @@ index 84f641cc316..848e78a34d3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); resource->byte_stride = *tokens++; if (resource->byte_stride % 4) -@@ -1300,7 +1368,7 @@ static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *in +@@ -1300,7 +1371,7 @@ static void shader_sm5_read_dcl_resource_raw(struct vkd3d_shader_instruction *in struct vkd3d_shader_raw_resource *resource = &ins->declaration.raw_resource; const uint32_t *end = &tokens[token_count]; @@ -17856,18 +23749,18 @@ index 84f641cc316..848e78a34d3 100644 shader_sm4_set_descriptor_register_range(priv, &resource->resource.reg.reg, &resource->resource.range); shader_sm4_read_register_space(priv, &tokens, end, &resource->resource.range.space); } -@@ -1330,11 +1398,21 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = +@@ -1330,11 +1401,23 @@ static const enum vkd3d_shader_register_precision register_precision_table[] = /* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16, }; -struct tpf_writer +struct sm4_stat - { ++{ + uint32_t fields[VKD3D_STAT_COUNT]; +}; + +struct tpf_compiler -+{ + { + /* OBJECTIVE: We want to get rid of this HLSL IR specific field. */ struct hlsl_ctx *ctx; - struct vkd3d_bytecode_buffer *buffer; @@ -17875,12 +23768,14 @@ index 84f641cc316..848e78a34d3 100644 struct vkd3d_sm4_lookup_tables lookup; + struct sm4_stat *stat; + ++ int result; ++ + struct vkd3d_bytecode_buffer *buffer; + struct dxbc_writer dxbc; }; static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) -@@ -1400,8 +1478,8 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1400,8 +1483,8 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_ISHR, VKD3DSIH_ISHR, "i", "ii"}, {VKD3D_SM4_OP_ITOF, VKD3DSIH_ITOF, "f", "i"}, {VKD3D_SM4_OP_LABEL, VKD3DSIH_LABEL, "", "O"}, @@ -17891,7 +23786,7 @@ index 84f641cc316..848e78a34d3 100644 {VKD3D_SM4_OP_LOG, VKD3DSIH_LOG, "f", "f"}, {VKD3D_SM4_OP_LOOP, VKD3DSIH_LOOP, "", ""}, {VKD3D_SM4_OP_LT, VKD3DSIH_LTO, "u", "ff"}, -@@ -1417,7 +1495,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1417,7 +1500,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_NOP, VKD3DSIH_NOP, "", ""}, {VKD3D_SM4_OP_NOT, VKD3DSIH_NOT, "u", "u"}, {VKD3D_SM4_OP_OR, VKD3DSIH_OR, "u", "uu"}, @@ -17900,7 +23795,7 @@ index 84f641cc316..848e78a34d3 100644 {VKD3D_SM4_OP_RET, VKD3DSIH_RET, "", ""}, {VKD3D_SM4_OP_RETC, VKD3DSIH_RETP, "", "u", shader_sm4_read_conditional_op}, -@@ -1426,12 +1504,12 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1426,12 +1509,12 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM4_OP_ROUND_PI, VKD3DSIH_ROUND_PI, "f", "f"}, {VKD3D_SM4_OP_ROUND_Z, VKD3DSIH_ROUND_Z, "f", "f"}, {VKD3D_SM4_OP_RSQ, VKD3DSIH_RSQ, "f", "f"}, @@ -17919,7 +23814,7 @@ index 84f641cc316..848e78a34d3 100644 {VKD3D_SM4_OP_SQRT, VKD3DSIH_SQRT, "f", "f"}, {VKD3D_SM4_OP_SWITCH, VKD3DSIH_SWITCH, "", "i"}, {VKD3D_SM4_OP_SINCOS, VKD3DSIH_SINCOS, "ff", "f"}, -@@ -1480,10 +1558,10 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1480,10 +1563,10 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) shader_sm4_read_dcl_indexable_temp}, {VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, VKD3DSIH_DCL_GLOBAL_FLAGS, "", "", shader_sm4_read_dcl_global_flags}, @@ -17934,7 +23829,7 @@ index 84f641cc316..848e78a34d3 100644 {VKD3D_SM5_OP_HS_DECLS, VKD3DSIH_HS_DECLS, "", ""}, {VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, VKD3DSIH_HS_CONTROL_POINT_PHASE, "", ""}, {VKD3D_SM5_OP_HS_FORK_PHASE, VKD3DSIH_HS_FORK_PHASE, "", ""}, -@@ -1492,14 +1570,14 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1492,14 +1575,14 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_OP_CUT_STREAM, VKD3DSIH_CUT_STREAM, "", "f"}, {VKD3D_SM5_OP_FCALL, VKD3DSIH_FCALL, "", "O", shader_sm5_read_fcall}, @@ -17953,7 +23848,7 @@ index 84f641cc316..848e78a34d3 100644 {VKD3D_SM5_OP_RCP, VKD3DSIH_RCP, "f", "f"}, {VKD3D_SM5_OP_F32TOF16, VKD3DSIH_F32TOF16, "u", "f"}, {VKD3D_SM5_OP_F16TOF32, VKD3DSIH_F16TOF32, "f", "u"}, -@@ -1551,33 +1629,33 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1551,33 +1634,33 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) shader_sm5_read_dcl_resource_raw}, {VKD3D_SM5_OP_DCL_RESOURCE_STRUCTURED, VKD3DSIH_DCL_RESOURCE_STRUCTURED, "", "", shader_sm5_read_dcl_resource_structured}, @@ -18014,7 +23909,7 @@ index 84f641cc316..848e78a34d3 100644 {VKD3D_SM5_OP_SYNC, VKD3DSIH_SYNC, "", "", shader_sm5_read_sync}, {VKD3D_SM5_OP_DADD, VKD3DSIH_DADD, "d", "dd"}, -@@ -1604,21 +1682,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1604,21 +1687,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_OP_DTOU, VKD3DSIH_DTOU, "u", "d"}, {VKD3D_SM5_OP_ITOD, VKD3DSIH_ITOD, "d", "i"}, {VKD3D_SM5_OP_UTOD, VKD3DSIH_UTOD, "d", "u"}, @@ -18049,7 +23944,16 @@ index 84f641cc316..848e78a34d3 100644 {VKD3D_SM5_OP_CHECK_ACCESS_FULLY_MAPPED, VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED, "u", "u"}, }; -@@ -1662,6 +1740,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1637,7 +1720,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID, VKD3D_SM4_SWIZZLE_NONE}, + {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL, VKD3D_SM4_SWIZZLE_INVALID}, +- {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_VEC4}, ++ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_SCALAR}, + {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM, VKD3D_SM4_SWIZZLE_VEC4}, + {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY, VKD3D_SM4_SWIZZLE_VEC4}, +@@ -1662,6 +1745,161 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) {VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4}, }; @@ -18211,7 +24115,15 @@ index 84f641cc316..848e78a34d3 100644 memset(lookup, 0, sizeof(*lookup)); for (i = 0; i < ARRAY_SIZE(opcode_table); ++i) -@@ -1678,13 +1911,13 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) +@@ -1669,6 +1907,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + const struct vkd3d_sm4_opcode_info *info = &opcode_table[i]; + + lookup->opcode_info_from_sm4[info->opcode] = info; ++ lookup->opcode_info_from_vsir[info->handler_idx] = info; + } + + for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) +@@ -1678,13 +1917,13 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) lookup->register_type_info_from_sm4[info->sm4_type] = info; lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info; } @@ -18231,7 +24143,32 @@ index 84f641cc316..848e78a34d3 100644 } static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( -@@ -1721,6 +1954,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( +@@ -1695,6 +1934,24 @@ static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( + return lookup->opcode_info_from_sm4[sm4_opcode]; + } + ++static const struct vkd3d_sm4_opcode_info *get_info_from_vsir_opcode( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_opcode vsir_opcode) ++{ ++ if (vsir_opcode >= VKD3DSIH_COUNT) ++ return NULL; ++ return lookup->opcode_info_from_vsir[vsir_opcode]; ++} ++ ++static unsigned int opcode_info_get_dst_count(const struct vkd3d_sm4_opcode_info *info) ++{ ++ return strnlen(info->dst_info, SM4_MAX_DST_COUNT); ++} ++ ++static unsigned int opcode_info_get_src_count(const struct vkd3d_sm4_opcode_info *info) ++{ ++ return strnlen(info->src_info, SM4_MAX_SRC_COUNT); ++} ++ + static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) + { +@@ -1721,6 +1978,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( return register_type_info->default_src_swizzle_type; } @@ -18248,7 +24185,7 @@ index 84f641cc316..848e78a34d3 100644 static enum vkd3d_data_type map_data_type(char t) { switch (t) -@@ -1735,12 +1978,8 @@ static enum vkd3d_data_type map_data_type(char t) +@@ -1735,12 +2002,8 @@ static enum vkd3d_data_type map_data_type(char t) return VKD3D_DATA_UINT; case 'O': return VKD3D_DATA_OPAQUE; @@ -18263,17 +24200,37 @@ index 84f641cc316..848e78a34d3 100644 default: ERR("Invalid data type '%c'.\n", t); return VKD3D_DATA_FLOAT; -@@ -2553,7 +2792,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro +@@ -1973,7 +2236,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui + return true; + } + +-static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) ++bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) + { + switch (reg->type) + { +@@ -2411,8 +2674,8 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + ins->raw = false; + ins->structured = false; + ins->predicate = NULL; +- ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); +- ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); ++ ins->dst_count = opcode_info_get_dst_count(opcode_info); ++ ins->src_count = opcode_info_get_src_count(opcode_info); + ins->src = src_params = vsir_program_get_src_params(program, ins->src_count); + if (!src_params && ins->src_count) + { +@@ -2553,7 +2816,8 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro version.minor = VKD3D_SM4_VERSION_MINOR(version_token); /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) + if (!vsir_program_init(program, compile_info, -+ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, false)) ++ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) return false; vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; -@@ -2670,6 +2910,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con +@@ -2670,6 +2934,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) uninvert_used_masks(&program->patch_constant_signature); @@ -18295,7 +24252,7 @@ index 84f641cc316..848e78a34d3 100644 if (!shader_sm4_parser_validate_signature(&sm4, &program->input_signature, sm4.input_register_masks, "Input") || !shader_sm4_parser_validate_signature(&sm4, &program->output_signature, -@@ -2706,9 +2961,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con +@@ -2706,12 +2985,8 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con && !sm4.has_control_point_phase && !sm4.p.failed) shader_sm4_validate_default_phase_index_ranges(&sm4); @@ -18304,33 +24261,30 @@ index 84f641cc316..848e78a34d3 100644 - if (sm4.p.failed) { - WARN("Failed to parse shader.\n"); -@@ -2716,10 +2968,21 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con +- WARN("Failed to parse shader.\n"); + vsir_program_cleanup(program); return VKD3D_ERROR_INVALID_SHADER; } - -+ if ((ret = vkd3d_shader_parser_validate(&sm4.p, config_flags)) < 0) -+ { -+ WARN("Failed to validate shader after parsing, ret %d.\n", ret); -+ -+ if (TRACE_ON()) -+ vsir_program_trace(program); -+ -+ vsir_program_cleanup(program); -+ return ret; -+ } -+ +@@ -2719,24 +2994,10 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_OK; } -static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block); -+static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); - - static bool type_is_integer(const struct hlsl_type *type) - { -@@ -2735,8 +2998,8 @@ static bool type_is_integer(const struct hlsl_type *type) - } - } +- +-static bool type_is_integer(const struct hlsl_type *type) +-{ +- switch (type->e.numeric.type) +- { +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- return true; +- +- default: +- return false; +- } +-} ++static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); -bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, enum vkd3d_shader_register_type *type, bool *has_idx) @@ -18339,7 +24293,20 @@ index 84f641cc316..848e78a34d3 100644 { unsigned int i; -@@ -2756,6 +3019,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem +@@ -2750,12 +3011,19 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + } + register_table[] = + { +- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, +- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, +- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, ++ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, ++ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, ++ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADINDEX, false}, ++ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, ++ ++ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_TESSCOORD, false}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, @@ -18349,7 +24316,7 @@ index 84f641cc316..848e78a34d3 100644 /* Put sv_target in this table, instead of letting it fall through to * default varying allocation, so that the register index matches the * usage index. */ -@@ -2768,9 +3034,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem +@@ -2768,9 +3036,9 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem for (i = 0; i < ARRAY_SIZE(register_table); ++i) { @@ -18361,7 +24328,7 @@ index 84f641cc316..848e78a34d3 100644 { if (type) *type = register_table[i].type; -@@ -2782,8 +3048,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem +@@ -2782,8 +3050,57 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem return false; } @@ -18421,7 +24388,7 @@ index 84f641cc316..848e78a34d3 100644 { unsigned int i; -@@ -2792,7 +3107,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant +@@ -2792,54 +3109,104 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant const char *name; bool output; enum vkd3d_shader_type shader_type; @@ -18430,8 +24397,9 @@ index 84f641cc316..848e78a34d3 100644 } semantics[] = { -@@ -2800,46 +3115,79 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, ++ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, @@ -18463,6 +24431,12 @@ index 84f641cc316..848e78a34d3 100644 - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, - {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, - {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VIEWPORT_ARRAY_INDEX}, ++ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, ++ ++ {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, ++ + {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, + {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, @@ -18527,6 +24501,16 @@ index 84f641cc316..848e78a34d3 100644 + } + return false; + } ++ } ++ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ { ++ if (!output) ++ { ++ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) ++ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) ++ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); ++ } + } for (i = 0; i < ARRAY_SIZE(semantics); ++i) @@ -18544,7 +24528,7 @@ index 84f641cc316..848e78a34d3 100644 return true; } } -@@ -2847,7 +3195,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant +@@ -2847,7 +3214,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant if (!needs_compat_mapping) return false; @@ -18553,14 +24537,27 @@ index 84f641cc316..848e78a34d3 100644 return true; } -@@ -2865,110 +3213,46 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, +@@ -2865,110 +3232,66 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, ctx->result = buffer->status; } -static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output) ++static int signature_element_pointer_compare(const void *x, const void *y) ++{ ++ const struct signature_element *e = *(const struct signature_element **)x; ++ const struct signature_element *f = *(const struct signature_element **)y; ++ int ret; ++ ++ if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) ++ return ret; ++ return vkd3d_u32_compare(e->mask, f->mask); ++} ++ +static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) { -+ bool output = tag == TAG_OSGN || tag == TAG_PCSG; ++ bool output = tag == TAG_OSGN || (tag == TAG_PCSG ++ && tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL); ++ const struct signature_element **sorted_elements; struct vkd3d_bytecode_buffer buffer = {0}; - struct vkd3d_string_buffer *string; - const struct hlsl_ir_var *var; @@ -18573,16 +24570,12 @@ index 84f641cc316..848e78a34d3 100644 put_u32(&buffer, 8); /* unknown */ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ for (i = 0; i < signature->element_count; ++i) - { +- { - unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; - uint32_t usage_idx, reg_idx; - D3D_NAME usage; - bool has_idx; -+ const struct signature_element *element = &signature->elements[i]; -+ enum vkd3d_shader_sysval_semantic sysval; -+ uint32_t used_mask = element->used_mask; - +- - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; - @@ -18591,7 +24584,12 @@ index 84f641cc316..848e78a34d3 100644 - if (usage == ~0u) - continue; - usage_idx = var->semantic.index; -- ++ if (!(sorted_elements = vkd3d_calloc(signature->element_count, sizeof(*sorted_elements)))) ++ return; ++ for (i = 0; i < signature->element_count; ++i) ++ sorted_elements[i] = &signature->elements[i]; ++ qsort(sorted_elements, signature->element_count, sizeof(*sorted_elements), signature_element_pointer_compare); + - if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, NULL, &has_idx)) - { - reg_idx = has_idx ? var->semantic.index : ~0u; @@ -18601,7 +24599,12 @@ index 84f641cc316..848e78a34d3 100644 - VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } -- ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ const struct signature_element *element = sorted_elements[i]; ++ enum vkd3d_shader_sysval_semantic sysval; ++ uint32_t used_mask = element->used_mask; + - use_mask = width; /* FIXME: accurately report use mask */ if (output) - use_mask = 0xf ^ use_mask; @@ -18654,17 +24657,17 @@ index 84f641cc316..848e78a34d3 100644 + for (i = 0; i < signature->element_count; ++i) { - const char *semantic = var->semantic.name; -+ const struct signature_element *element = &signature->elements[i]; ++ const struct signature_element *element = sorted_elements[i]; size_t string_offset; - D3D_NAME usage; - - if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) - continue; -- + - hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - if (usage == ~0u) - continue; - +- - if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) - string_offset = put_string(&buffer, "SV_Target"); - else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) @@ -18682,10 +24685,11 @@ index 84f641cc316..848e78a34d3 100644 - - add_section(ctx, dxbc, output ? TAG_OSGN : TAG_ISGN, &buffer); + add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); ++ vkd3d_free(sorted_elements); } static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -@@ -2990,6 +3274,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) +@@ -2990,6 +3313,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: @@ -18693,14 +24697,25 @@ index 84f641cc316..848e78a34d3 100644 case HLSL_CLASS_STRUCT: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: -@@ -3123,24 +3408,24 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) +@@ -3008,6 +3332,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STREAM_OUTPUT: + case HLSL_CLASS_NULL: + break; + } +@@ -3123,24 +3448,30 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) vkd3d_unreachable(); } -static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) +static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) { - switch (type->e.resource.format->e.numeric.type) +- switch (type->e.resource.format->e.numeric.type) ++ const struct hlsl_type *format = type->e.resource.format; ++ ++ switch (format->e.numeric.type) { case HLSL_TYPE_DOUBLE: - return D3D_RETURN_TYPE_DOUBLE; @@ -18709,6 +24724,10 @@ index 84f641cc316..848e78a34d3 100644 case HLSL_TYPE_FLOAT: case HLSL_TYPE_HALF: - return D3D_RETURN_TYPE_FLOAT; ++ if (format->modifiers & HLSL_MODIFIER_UNORM) ++ return VKD3D_SM4_DATA_UNORM; ++ if (format->modifiers & HLSL_MODIFIER_SNORM) ++ return VKD3D_SM4_DATA_SNORM; + return VKD3D_SM4_DATA_FLOAT; case HLSL_TYPE_INT: @@ -18723,7 +24742,15 @@ index 84f641cc316..848e78a34d3 100644 default: vkd3d_unreachable(); -@@ -3398,6 +3683,48 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un +@@ -3170,6 +3501,7 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ + case HLSL_SAMPLER_DIM_CUBEARRAY: + return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return D3D_SRV_DIMENSION_BUFFER; + default: +@@ -3398,6 +3730,48 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un return extern_resources; } @@ -18772,7 +24799,7 @@ index 84f641cc316..848e78a34d3 100644 static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); -@@ -3471,7 +3798,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3471,7 +3845,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { unsigned int dimx = resource->component_type->e.resource.format->dimx; @@ -18781,7 +24808,7 @@ index 84f641cc316..848e78a34d3 100644 put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; -@@ -3552,7 +3879,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3552,7 +3926,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { uint32_t flags = 0; @@ -18790,7 +24817,7 @@ index 84f641cc316..848e78a34d3 100644 flags |= D3D_SVF_USED; put_u32(&buffer, 0); /* name */ -@@ -3598,7 +3925,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3598,7 +3972,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) for (k = 0; k < comp_count; ++k) { struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); @@ -18799,7 +24826,7 @@ index 84f641cc316..848e78a34d3 100644 enum hlsl_regset regset; if (comp_type->class == HLSL_CLASS_STRING) -@@ -3608,7 +3935,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -3608,7 +3982,8 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) continue; } @@ -18809,7 +24836,15 @@ index 84f641cc316..848e78a34d3 100644 if (regset == HLSL_REGSET_NUMERIC) { if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -@@ -3779,11 +4107,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s +@@ -3655,6 +4030,7 @@ static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_typ + case HLSL_SAMPLER_DIM_CUBEARRAY: + return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; + case HLSL_SAMPLER_DIM_BUFFER: ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + return VKD3D_SM4_RESOURCE_BUFFER; + default: +@@ -3779,11 +4155,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask); } @@ -18825,7 +24860,7 @@ index 84f641cc316..848e78a34d3 100644 if (var->is_uniform) { -@@ -3793,7 +4123,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3793,7 +4171,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_RESOURCE; reg->dimension = VSIR_DIMENSION_VEC4; @@ -18834,7 +24869,7 @@ index 84f641cc316..848e78a34d3 100644 { reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -@@ -3812,7 +4142,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3812,7 +4190,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_UAV; reg->dimension = VSIR_DIMENSION_VEC4; @@ -18843,7 +24878,7 @@ index 84f641cc316..848e78a34d3 100644 { reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -@@ -3831,7 +4161,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3831,7 +4209,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_SAMPLER; reg->dimension = VSIR_DIMENSION_NONE; @@ -18852,7 +24887,7 @@ index 84f641cc316..848e78a34d3 100644 { reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -@@ -3853,7 +4183,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3853,7 +4231,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); reg->type = VKD3DSPR_CONSTBUFFER; reg->dimension = VSIR_DIMENSION_VEC4; @@ -18861,7 +24896,7 @@ index 84f641cc316..848e78a34d3 100644 { reg->idx[0].offset = var->buffer->reg.id; reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -@@ -3873,7 +4203,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3873,7 +4251,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { bool has_idx; @@ -18870,7 +24905,7 @@ index 84f641cc316..848e78a34d3 100644 { unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -@@ -3883,7 +4213,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3883,7 +4261,10 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re reg->idx_count = 1; } @@ -18882,7 +24917,20 @@ index 84f641cc316..848e78a34d3 100644 *writemask = ((1u << data_type->dimx) - 1) << (offset % 4); } else -@@ -3902,7 +4235,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3891,7 +4272,11 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + + VKD3D_ASSERT(hlsl_reg.allocated); +- reg->type = VKD3DSPR_INPUT; ++ ++ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ reg->type = VKD3DSPR_PATCHCONST; ++ else ++ reg->type = VKD3DSPR_INPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; + reg->idx_count = 1; +@@ -3902,7 +4287,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { bool has_idx; @@ -18891,7 +24939,7 @@ index 84f641cc316..848e78a34d3 100644 { unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); -@@ -3912,7 +4245,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3912,7 +4297,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re reg->idx_count = 1; } @@ -18900,7 +24948,7 @@ index 84f641cc316..848e78a34d3 100644 reg->dimension = VSIR_DIMENSION_SCALAR; else reg->dimension = VSIR_DIMENSION_VEC4; -@@ -3938,13 +4271,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re +@@ -3938,13 +4323,13 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re } } @@ -18916,7 +24964,7 @@ index 84f641cc316..848e78a34d3 100644 if (vkd3d_sm4_get_default_swizzle_type(&tpf->lookup, src->reg.type) == VKD3D_SM4_SWIZZLE_VEC4) { hlsl_swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask); -@@ -3982,7 +4315,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, +@@ -3982,7 +4367,7 @@ static void sm4_src_from_constant_value(struct vkd3d_shader_src_param *src, } } @@ -18925,7 +24973,7 @@ index 84f641cc316..848e78a34d3 100644 const struct hlsl_ir_node *instr, uint32_t map_writemask) { unsigned int hlsl_swizzle; -@@ -4018,7 +4351,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ +@@ -4018,7 +4403,7 @@ static unsigned int sm4_get_index_addressing_from_reg(const struct vkd3d_shader_ return 0; } @@ -18934,7 +24982,7 @@ index 84f641cc316..848e78a34d3 100644 enum vkd3d_sm4_swizzle_type sm4_swizzle_type, uint32_t sm4_swizzle) { const struct vkd3d_sm4_register_type_info *register_type_info; -@@ -4078,7 +4411,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v +@@ -4078,7 +4463,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v return token; } @@ -18943,7 +24991,7 @@ index 84f641cc316..848e78a34d3 100644 unsigned int j) { unsigned int addressing = sm4_get_index_addressing_from_reg(reg, j); -@@ -4108,7 +4441,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct +@@ -4108,7 +4493,7 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct } } @@ -18952,7 +25000,7 @@ index 84f641cc316..848e78a34d3 100644 { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = 0; -@@ -4121,7 +4454,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk +@@ -4121,7 +4506,7 @@ static void sm4_write_dst_register(const struct tpf_writer *tpf, const struct vk sm4_write_register_index(tpf, &dst->reg, j); } @@ -18961,7 +25009,7 @@ index 84f641cc316..848e78a34d3 100644 { struct vkd3d_bytecode_buffer *buffer = tpf->buffer; uint32_t token = 0, mod_token = 0; -@@ -4182,10 +4515,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk +@@ -4182,10 +4567,55 @@ static void sm4_write_src_register(const struct tpf_writer *tpf, const struct vk } } @@ -19019,7 +25067,7 @@ index 84f641cc316..848e78a34d3 100644 unsigned int size, i, j; size_t token_position; -@@ -4218,6 +4596,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4 +@@ -4218,6 +4648,8 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4 size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t); token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT); set_u32(buffer, token_position, token); @@ -19028,7 +25076,7 @@ index 84f641cc316..848e78a34d3 100644 } static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, -@@ -4247,7 +4627,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, +@@ -4247,7 +4679,7 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, return true; } @@ -19037,7 +25085,7 @@ index 84f641cc316..848e78a34d3 100644 { size_t size = (cbuffer->used_size + 3) / 4; -@@ -4282,7 +4662,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st +@@ -4282,7 +4714,7 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st write_sm4_instruction(tpf, &instr); } @@ -19046,7 +25094,7 @@ index 84f641cc316..848e78a34d3 100644 { unsigned int i; struct sm4_instruction instr = -@@ -4323,9 +4703,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex +@@ -4323,9 +4755,10 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex } } @@ -19058,7 +25106,7 @@ index 84f641cc316..848e78a34d3 100644 enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; struct hlsl_type *component_type; struct sm4_instruction instr; -@@ -4348,21 +4729,21 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex +@@ -4348,21 +4781,21 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex .dsts[0].reg.idx_count = 1, .dst_count = 1, @@ -19085,98 +25133,171 @@ index 84f641cc316..848e78a34d3 100644 { VKD3D_ASSERT(!i); instr.dsts[0].reg.idx[0].offset = resource->id; -@@ -4408,11 +4789,12 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex +@@ -4387,6 +4820,9 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; + instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; + break; ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: ++ instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW; ++ break; + default: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; + break; +@@ -4397,7 +4833,15 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + } + else + { +- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; ++ switch (component_type->sampler_dim) ++ { ++ case HLSL_SAMPLER_DIM_RAW_BUFFER: ++ instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; ++ break; ++ default: ++ instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; ++ break; ++ } + } + instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + +@@ -4408,305 +4852,189 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex } } -static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hlsl_ir_var *var) -+static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, -+ const struct hlsl_ir_var *var, bool is_patch_constant_func) ++static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) { - const struct hlsl_profile_info *profile = tpf->ctx->profile; -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const bool output = var->is_output_semantic; +- const bool output = var->is_output_semantic; - D3D_NAME usage; -+ enum vkd3d_shader_sysval_semantic semantic; - bool has_idx; - +- bool has_idx; +- struct sm4_instruction instr = -@@ -4421,7 +4803,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - .dst_count = 1, + { +- .dsts[0].reg.dimension = VSIR_DIMENSION_VEC4, +- .dst_count = 1, ++ .opcode = VKD3D_SM4_OP_DCL_TEMPS, ++ ++ .idx = {count}, ++ .idx_count = 1, }; - if (hlsl_sm4_register_from_semantic(tpf->ctx, &var->semantic, output, &instr.dsts[0].reg.type, &has_idx)) -+ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) +- { +- if (has_idx) +- { +- instr.dsts[0].reg.idx[0].offset = var->semantic.index; +- instr.dsts[0].reg.idx_count = 1; +- } +- else +- { +- instr.dsts[0].reg.idx_count = 0; +- } +- instr.dsts[0].write_mask = (1 << var->data_type->dimx) - 1; +- } +- else ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) ++{ ++ struct sm4_instruction instr = { - if (has_idx) - { -@@ -4442,36 +4824,39 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } +- instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; +- instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; +- instr.dsts[0].reg.idx_count = 1; +- instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; +- } ++ .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, - if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) -+ if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) - instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; +- instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; ++ .idx = {temp->register_idx, temp->register_size, temp->component_count}, ++ .idx_count = 3, ++ }; - hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); - if (usage == ~0u) - usage = D3D_NAME_UNDEFINED; -+ sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, -+ tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); -+ if (semantic == ~0u) -+ semantic = VKD3D_SHADER_SV_NONE; ++ write_sm4_instruction(tpf, &instr); ++} - if (var->is_input_semantic) +- if (var->is_input_semantic) ++static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags) ++{ ++ struct sm4_instruction instr = { - switch (usage) -+ switch (semantic) - { +- { - case D3D_NAME_UNDEFINED: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ case VKD3D_SHADER_SV_NONE: -+ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; - break; +- ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; +- break; ++ .opcode = opcode, - case D3D_NAME_INSTANCE_ID: - case D3D_NAME_PRIMITIVE_ID: - case D3D_NAME_VERTEX_ID: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ case VKD3D_SHADER_SV_INSTANCE_ID: -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; - break; +- ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; +- break; ++ .dsts[0] = *dst, ++ .dst_count = 1, - default: +- default: - instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) - ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; - break; - } - +- ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; +- break; +- } +- - if (profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) - { - enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; - -@@ -4510,32 +4895,32 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl - } - else - { +- { +- enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; +- +- if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) +- { +- mode = VKD3DSIM_CONSTANT; +- } +- else +- { +- static const struct +- { +- unsigned int modifiers; +- enum vkd3d_shader_interpolation_mode mode; +- } +- modes[] = +- { +- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID }, +- { HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE }, +- { HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID }, +- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID }, +- }; +- unsigned int i; +- +- for (i = 0; i < ARRAY_SIZE(modes); ++i) +- { +- if ((var->storage_modifiers & modes[i].modifiers) == modes[i].modifiers) +- { +- mode = modes[i].mode; +- break; +- } +- } +- } +- +- instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; +- } +- } +- else +- { - if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; - else - instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; - } - +- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; +- else +- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; +- } +- - switch (usage) -+ if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) - { +- { - case D3D_NAME_COVERAGE: - case D3D_NAME_DEPTH: - case D3D_NAME_DEPTH_GREATER_EQUAL: @@ -19189,125 +25310,207 @@ index 84f641cc316..848e78a34d3 100644 - instr.idx_count = 1; - instr.idx[0] = usage; - break; -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET -+ || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); -+ } -+ else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) -+ { -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); -+ } -+ else -+ { -+ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); -+ instr.idx_count = 1; -+ instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); - } +- } ++ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, ++ }; write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_count) -+static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) ++static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, ++ const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags) { struct sm4_instruction instr = { -@@ -4548,7 +4933,7 @@ static void write_sm4_dcl_temps(const struct tpf_writer *tpf, uint32_t temp_coun +- .opcode = VKD3D_SM4_OP_DCL_TEMPS, ++ .opcode = opcode, + +- .idx = {temp_count}, ++ .dsts[0] = semantic->reg, ++ .dst_count = 1, ++ ++ .idx[0] = semantic->sysval_semantic, + .idx_count = 1, ++ ++ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, + }; + write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t idx, -+static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, - uint32_t size, uint32_t comp_count) +- uint32_t size, uint32_t comp_count) ++static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size) { struct sm4_instruction instr = -@@ -4562,7 +4947,7 @@ static void write_sm4_dcl_indexable_temp(const struct tpf_writer *tpf, uint32_t + { +- .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, ++ .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, + +- .idx = {idx, size, comp_count}, ++ .idx = {group_size->x, group_size->y, group_size->z}, + .idx_count = 3, + }; + write_sm4_instruction(tpf, &instr); } -static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint32_t thread_count[3]) -+static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) ++static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) { struct sm4_instruction instr = { -@@ -4577,7 +4962,105 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3 +- .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, +- +- .idx[0] = thread_count[0], +- .idx[1] = thread_count[1], +- .idx[2] = thread_count[2], +- .idx_count = 3, ++ .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, ++ .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, + }; + write_sm4_instruction(tpf, &instr); } -static void write_sm4_ret(const struct tpf_writer *tpf) -+static void write_sm4_dcl_global_flags(const struct tpf_compiler *tpf, uint32_t flags) -+{ -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS, -+ .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ +static void tpf_write_hs_decls(const struct tpf_compiler *tpf) -+{ -+ struct sm4_instruction instr = -+ { + { + struct sm4_instruction instr = + { +- .opcode = VKD3D_SM4_OP_RET, + .opcode = VKD3D_SM5_OP_HS_DECLS, -+ }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ + }; + + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) +static void tpf_write_hs_control_point_phase(const struct tpf_compiler *tpf) -+{ + { +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[0].write_mask); +- instr.srcs[0].modifiers = src_mod; +- instr.src_count = 1; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_CONTROL_POINT_PHASE, + }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ + + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) +static void tpf_write_hs_fork_phase(const struct tpf_compiler *tpf) -+{ + { +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); +- sm4_dst_from_node(&instr.dsts[dst_idx], dst); +- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; +- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; +- instr.dsts[1 - dst_idx].reg.idx_count = 0; +- instr.dst_count = 2; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[dst_idx].write_mask); +- instr.src_count = 1; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_HS_FORK_PHASE, + }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ + + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +static void tpf_write_dcl_input_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) -+{ + { +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); +- instr.src_count = 2; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_INPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ + + write_sm4_instruction(tpf, &instr); + } + +-/* dp# instructions don't map the swizzle. */ +-static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +static void tpf_write_dcl_output_control_point_count(const struct tpf_compiler *tpf, const uint32_t count) -+{ + { +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 2; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_OUTPUT_CONTROL_POINT_COUNT, + .extra_bits = count << VKD3D_SM5_CONTROL_POINT_COUNT_SHIFT, + }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ + + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, +- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, +- const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) +static void tpf_write_dcl_tessellator_domain(const struct tpf_compiler *tpf, enum vkd3d_tessellator_domain domain) -+{ + { +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_DOMAIN, + .extra_bits = domain << VKD3D_SM5_TESSELLATOR_SHIFT, + }; -+ + +- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); +- sm4_dst_from_node(&instr.dsts[dst_idx], dst); +- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; +- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; +- instr.dsts[1 - dst_idx].reg.idx_count = 0; +- instr.dst_count = 2; + write_sm4_instruction(tpf, &instr); +} -+ + +- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[dst_idx].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[dst_idx].write_mask); +- instr.src_count = 2; +static void tpf_write_dcl_tessellator_partitioning(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_partitioning partitioning) +{ @@ -19316,81 +25519,42 @@ index 84f641cc316..848e78a34d3 100644 + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_PARTITIONING, + .extra_bits = partitioning << VKD3D_SM5_TESSELLATOR_SHIFT, + }; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ + + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, +- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, +- const struct hlsl_ir_node *src3) +static void tpf_write_dcl_tessellator_output_primitive(const struct tpf_compiler *tpf, + enum vkd3d_shader_tessellator_output_primitive output_primitive) -+{ + { +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = opcode; + struct sm4_instruction instr = + { + .opcode = VKD3D_SM5_OP_DCL_TESSELLATOR_OUTPUT_PRIMITIVE, + .extra_bits = output_primitive << VKD3D_SM5_TESSELLATOR_SHIFT, + }; -+ + +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; + write_sm4_instruction(tpf, &instr); +} -+ + +- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[2], src3, instr.dsts[0].write_mask); +- instr.src_count = 3; +static void write_sm4_ret(const struct tpf_compiler *tpf) - { - struct sm4_instruction instr = - { -@@ -4587,7 +5070,7 @@ static void write_sm4_ret(const struct tpf_writer *tpf) - write_sm4_instruction(tpf, &instr); - } ++{ ++ struct sm4_instruction instr = ++ { ++ .opcode = VKD3D_SM4_OP_RET, ++ }; --static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) - { - struct sm4_instruction instr; -@@ -4605,7 +5088,7 @@ static void write_sm4_unary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opco - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) - { - struct sm4_instruction instr; -@@ -4626,7 +5109,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -4645,7 +5128,7 @@ static void write_sm4_binary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opc - } - - /* dp# instructions don't map the swizzle. */ --static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { - struct sm4_instruction instr; -@@ -4663,7 +5146,7 @@ static void write_sm4_binary_op_dot(const struct tpf_writer *tpf, enum vkd3d_sm4 - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *tpf, -+static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, - enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, - const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) - { -@@ -4686,7 +5169,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_opcode opcode, -+static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, - const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, - const struct hlsl_ir_node *src3) - { -@@ -4706,7 +5189,7 @@ static void write_sm4_ternary_op(const struct tpf_writer *tpf, enum vkd3d_sm4_op write_sm4_instruction(tpf, &instr); } @@ -19399,15 +25563,24 @@ index 84f641cc316..848e78a34d3 100644 const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, enum hlsl_sampler_dim dim) -@@ -4715,6 +5198,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node +@@ -4715,12 +5043,16 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node bool multisampled = resource_type->class == HLSL_CLASS_TEXTURE && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); + const struct vkd3d_shader_version *version = &tpf->program->shader_version; ++ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; struct sm4_instruction instr; -@@ -4769,7 +5253,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node + memset(&instr, 0, sizeof(instr)); + if (uav) + instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; ++ else if (raw) ++ instr.opcode = VKD3D_SM5_OP_LD_RAW; + else + instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; + +@@ -4769,7 +5101,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node reg->dimension = VSIR_DIMENSION_SCALAR; reg->u.immconst_u32[0] = index->value.u[0].u; } @@ -19416,7 +25589,7 @@ index 84f641cc316..848e78a34d3 100644 { hlsl_error(tpf->ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index."); } -@@ -4784,7 +5268,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node +@@ -4784,7 +5116,7 @@ static void write_sm4_ld(const struct tpf_writer *tpf, const struct hlsl_ir_node write_sm4_instruction(tpf, &instr); } @@ -19425,132 +25598,861 @@ index 84f641cc316..848e78a34d3 100644 { const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *coords = load->coords.node; -@@ -4864,7 +5348,7 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; -@@ -4886,7 +5370,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) -+static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) - { - const struct hlsl_deref *resource = &load->resource; - const struct hlsl_ir_node *dst = &load->node; -@@ -4921,7 +5405,7 @@ static bool type_is_float(const struct hlsl_type *type) - return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; - } - --static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, -+static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, - const struct hlsl_ir_node *arg, uint32_t mask) - { - struct sm4_instruction instr; -@@ -4941,7 +5425,7 @@ static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) -+static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) - { - static const union - { -@@ -5050,7 +5534,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - } - --static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, -+static void write_sm4_store_uav_typed(const struct tpf_compiler *tpf, const struct hlsl_deref *dst, - const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) - { - struct sm4_instruction instr; -@@ -5058,7 +5542,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; - -- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); -+ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); - instr.dst_count = 1; - - sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -@@ -5068,7 +5552,7 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) -+static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) - { - struct sm4_instruction instr; - -@@ -5087,8 +5571,9 @@ static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, cons - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) -+static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_ir_node *arg2 = expr->operands[1].node; - const struct hlsl_ir_node *arg3 = expr->operands[2].node; -@@ -5103,7 +5588,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - switch (expr->op) - { - case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -- if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) - write_sm4_rasterizer_sample_count(tpf, &expr->node); - else - hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -@@ -5224,7 +5709,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - case HLSL_TYPE_FLOAT: - /* SM5 comes with a RCP opcode */ -- if (tpf->ctx->profile->major_version >= 5) -+ if (vkd3d_shader_ver_ge(version, 5, 0)) - { - write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); - } -@@ -5578,6 +6063,23 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); +@@ -4821,775 +5153,107 @@ static void write_sm4_sample(const struct tpf_writer *tpf, const struct hlsl_ir_ break; -+ case HLSL_OP3_MAD: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MAD, &expr->node, arg1, arg2, arg3); -+ break; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ write_sm4_ternary_op(tpf, VKD3D_SM4_OP_IMAD, &expr->node, arg1, arg2, arg3); -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -+ } -+ break; -+ default: - hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); - } -@@ -5585,7 +6087,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - hlsl_release_string_buffer(tpf->ctx, dst_type_string); +- vkd3d_unreachable(); +- } +- +- if (texel_offset) +- { +- if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) +- { +- hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, +- "Offset must resolve to integer literal in the range -8 to 7."); +- return; +- } +- } +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); +- instr.src_count = 3; +- +- if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD +- || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); +- ++instr.src_count; +- } +- else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); +- instr.src_count += 2; +- } +- else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP +- || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) +- { +- sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); +- ++instr.src_count; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; +- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) +- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir_resource_load *load) +-{ +- const struct hlsl_deref *resource = &load->resource; +- const struct hlsl_ir_node *dst = &load->node; +- struct sm4_instruction instr; +- +- if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER +- || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- { +- hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); +- return; +- } +- +- VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_RESINFO; +- if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) +- instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static bool type_is_float(const struct hlsl_type *type) +-{ +- return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; +-} +- +-static void write_sm4_cast_from_bool(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr, +- const struct hlsl_ir_node *arg, uint32_t mask) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_AND; +- +- sm4_dst_from_node(&instr.dsts[0], &expr->node); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], arg, instr.dsts[0].write_mask); +- instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; +- instr.srcs[1].reg.dimension = VSIR_DIMENSION_SCALAR; +- instr.srcs[1].reg.u.immconst_u32[0] = mask; +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +-{ +- static const union +- { +- uint32_t u; +- float f; +- } one = { .f = 1.0 }; +- const struct hlsl_ir_node *arg1 = expr->operands[0].node; +- const struct hlsl_type *dst_type = expr->node.data_type; +- const struct hlsl_type *src_type = arg1->data_type; +- +- /* Narrowing casts were already lowered. */ +- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); +- +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(tpf, expr, arg1, one.u); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); +- break; +- +- default: +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_TYPE_INT: +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(tpf, expr, arg1, 1); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); +- break; +- +- default: +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_TYPE_UINT: +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_HALF: +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_TYPE_BOOL: +- write_sm4_cast_from_bool(tpf, expr, arg1, 1); +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); +- break; +- +- default: +- vkd3d_unreachable(); +- } +- break; +- +- case HLSL_TYPE_DOUBLE: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); +- break; +- +- case HLSL_TYPE_BOOL: +- /* Casts to bool should have already been lowered. */ +- default: +- vkd3d_unreachable(); +- } +-} +- +-static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct hlsl_deref *dst, +- const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; +- +- sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); +- sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) +-{ +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; +- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; +- +- sm4_dst_from_node(&instr.dsts[0], dst); +- instr.dst_count = 1; +- +- instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; +- instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; +- instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- +-static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) +-{ +- const struct hlsl_ir_node *arg1 = expr->operands[0].node; +- const struct hlsl_ir_node *arg2 = expr->operands[1].node; +- const struct hlsl_ir_node *arg3 = expr->operands[2].node; +- const struct hlsl_type *dst_type = expr->node.data_type; +- struct vkd3d_string_buffer *dst_type_string; +- +- VKD3D_ASSERT(expr->node.reg.allocated); +- +- if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) +- return; +- +- switch (expr->op) +- { +- case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: +- if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) +- write_sm4_rasterizer_sample_count(tpf, &expr->node); +- else +- hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, +- "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); +- break; +- +- case HLSL_OP1_ABS: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_BIT_NOT: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_CAST: +- write_sm4_cast(tpf, expr); +- break; +- +- case HLSL_OP1_CEIL: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_COS: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); +- break; +- +- case HLSL_OP1_DSX: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSX_COARSE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSX_FINE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSY: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSY_COARSE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_DSY_FINE: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_EXP2: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_F16TOF32: +- VKD3D_ASSERT(type_is_float(dst_type)); +- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_FLOOR: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_FRACT: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_LOG2: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_LOGIC_NOT: +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_NEG: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_RCP: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- /* SM5 comes with a RCP opcode */ +- if (tpf->ctx->profile->major_version >= 5) +- { +- write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); +- } +- else +- { +- /* For SM4, implement as DIV dst, 1.0, src */ +- struct sm4_instruction instr; +- struct hlsl_constant_value one; +- +- VKD3D_ASSERT(type_is_float(dst_type)); +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_DIV; +- +- sm4_dst_from_node(&instr.dsts[0], &expr->node); +- instr.dst_count = 1; +- +- for (unsigned int i = 0; i < 4; i++) +- one.u[i].f = 1.0f; +- sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); +- sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); +- instr.src_count = 2; +- +- write_sm4_instruction(tpf, &instr); +- } +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP1_REINTERPRET: +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_ROUND: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_RSQ: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_SAT: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV +- | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), +- &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_SIN: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); +- break; +- +- case HLSL_OP1_SQRT: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP1_TRUNC: +- VKD3D_ASSERT(type_is_float(dst_type)); +- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); +- break; +- +- case HLSL_OP2_ADD: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_BIT_AND: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_BIT_OR: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_BIT_XOR: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_DIV: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_DOT: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- switch (arg1->data_type->dimx) +- { +- case 4: +- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); +- break; +- +- case 3: +- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); +- break; +- +- case 2: +- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); +- break; +- +- case 1: +- default: +- vkd3d_unreachable(); +- } +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); +- } +- break; +- +- case HLSL_OP2_EQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_GEQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_LESS: +- { +- const struct hlsl_type *src_type = arg1->data_type; +- +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } +- +- case HLSL_OP2_LOGIC_AND: +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_LOGIC_OR: +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_LSHIFT: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); +- break; +- +- case HLSL_OP2_MAX: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); +- break; +- +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); +- break; +- +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); +- } +- break; ++ vkd3d_unreachable(); ++ } + +- case HLSL_OP2_MIN: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); +- break; ++ if (texel_offset) ++ { ++ if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) ++ { ++ hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, ++ "Offset must resolve to integer literal in the range -8 to 7."); ++ return; ++ } ++ } + +- case HLSL_TYPE_INT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); +- break; ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; + +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); +- break; ++ sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); ++ sm4_src_from_deref(tpf, &instr.srcs[2], sampler, VKD3DSP_WRITEMASK_ALL, &instr); ++ instr.src_count = 3; + +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); +- } +- break; ++ if (load->load_type == HLSL_RESOURCE_SAMPLE_LOD ++ || load->load_type == HLSL_RESOURCE_SAMPLE_LOD_BIAS) ++ { ++ sm4_src_from_node(tpf, &instr.srcs[3], load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ ++instr.src_count; ++ } ++ else if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) ++ { ++ sm4_src_from_node(tpf, &instr.srcs[3], load->ddx.node, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_node(tpf, &instr.srcs[4], load->ddy.node, VKD3DSP_WRITEMASK_ALL); ++ instr.src_count += 2; ++ } ++ else if (load->load_type == HLSL_RESOURCE_SAMPLE_CMP ++ || load->load_type == HLSL_RESOURCE_SAMPLE_CMP_LZ) ++ { ++ sm4_src_from_node(tpf, &instr.srcs[3], load->cmp.node, VKD3DSP_WRITEMASK_ALL); ++ ++instr.src_count; ++ } + +- case HLSL_OP2_MOD: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_UINT: +- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); +- break; ++ write_sm4_instruction(tpf, &instr); ++} + +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); +- } +- break; ++static void write_sm4_sampleinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *dst = &load->node; ++ struct sm4_instruction instr; + +- case HLSL_OP2_MUL: +- switch (dst_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); +- break; ++ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- /* Using IMUL instead of UMUL because we're taking the low +- * bits, and the native compiler generates IMUL. */ +- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); +- break; ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; ++ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) ++ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); +- } +- break; ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; + +- case HLSL_OP2_NEQUAL: +- { +- const struct hlsl_type *src_type = arg1->data_type; ++ sm4_src_from_deref(tpf, &instr.srcs[0], resource, instr.dsts[0].write_mask, &instr); ++ instr.src_count = 1; + +- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ write_sm4_instruction(tpf, &instr); ++} + +- switch (src_type->e.numeric.type) +- { +- case HLSL_TYPE_FLOAT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); +- break; ++static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_load *load) ++{ ++ const struct hlsl_deref *resource = &load->resource; ++ const struct hlsl_ir_node *dst = &load->node; ++ struct sm4_instruction instr; + +- case HLSL_TYPE_BOOL: +- case HLSL_TYPE_INT: +- case HLSL_TYPE_UINT: +- write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); +- break; ++ if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER ++ || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) ++ { ++ hlsl_fixme(tpf->ctx, &load->node.loc, "resinfo for buffers."); ++ return; ++ } + +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", +- debug_hlsl_type(tpf->ctx, src_type)); +- break; +- } +- break; +- } ++ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + +- case HLSL_OP2_RSHIFT: +- VKD3D_ASSERT(type_is_integer(dst_type)); +- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); +- write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, +- &expr->node, arg1, arg2); +- break; ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_RESINFO; ++ if (dst->data_type->e.numeric.type == HLSL_TYPE_UINT) ++ instr.extra_bits |= VKD3DSI_RESINFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + +- case HLSL_OP3_TERNARY: +- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); +- break; ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; + +- default: +- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); +- } ++ sm4_src_from_node(tpf, &instr.srcs[0], load->lod.node, VKD3DSP_WRITEMASK_ALL); ++ sm4_src_from_deref(tpf, &instr.srcs[1], resource, instr.dsts[0].write_mask, &instr); ++ instr.src_count = 2; + +- hlsl_release_string_buffer(tpf->ctx, dst_type_string); ++ write_sm4_instruction(tpf, &instr); } -static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if *iff) -+static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) ++static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) { struct sm4_instruction instr = { -@@ -5614,7 +6116,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * +- .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, ++ .opcode = VKD3D_SM4_OP_IF, ++ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, + .src_count = 1, + }; + +@@ -5614,7 +5278,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * write_sm4_instruction(tpf, &instr); } @@ -19559,46 +26461,66 @@ index 84f641cc316..848e78a34d3 100644 { struct sm4_instruction instr = {0}; -@@ -5653,16 +6155,17 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju - /* Does this variable's data come directly from the API user, rather than being - * temporary or from a previous shader stage? - * I.e. is it a uniform or VS input? */ --static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) -+static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var) - { - if (var->is_uniform) - return true; - -- return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; -+ return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX; - } - --static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) -+static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_load *load) - { -+ const struct vkd3d_shader_version *version = &tpf->program->shader_version; - const struct hlsl_type *type = load->node.data_type; - struct sm4_instruction instr; - -@@ -5672,7 +6175,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo - instr.dst_count = 1; - - VKD3D_ASSERT(hlsl_is_numeric_type(type)); -- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) -+ if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var)) - { - struct hlsl_constant_value value; - -@@ -5700,7 +6203,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo +@@ -5650,57 +5314,7 @@ static void write_sm4_jump(const struct tpf_writer *tpf, const struct hlsl_ir_ju write_sm4_instruction(tpf, &instr); } +-/* Does this variable's data come directly from the API user, rather than being +- * temporary or from a previous shader stage? +- * I.e. is it a uniform or VS input? */ +-static bool var_is_user_input(struct hlsl_ctx *ctx, const struct hlsl_ir_var *var) +-{ +- if (var->is_uniform) +- return true; +- +- return var->is_input_semantic && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; +-} +- +-static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_load *load) +-{ +- const struct hlsl_type *type = load->node.data_type; +- struct sm4_instruction instr; +- +- memset(&instr, 0, sizeof(instr)); +- +- sm4_dst_from_node(&instr.dsts[0], &load->node); +- instr.dst_count = 1; +- +- VKD3D_ASSERT(hlsl_is_numeric_type(type)); +- if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) +- { +- struct hlsl_constant_value value; +- +- /* Uniform bools can be specified as anything, but internal bools always +- * have 0 for false and ~0 for true. Normalize that here. */ +- +- instr.opcode = VKD3D_SM4_OP_MOVC; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); +- +- memset(&value, 0xff, sizeof(value)); +- sm4_src_from_constant_value(&instr.srcs[1], &value, type->dimx, instr.dsts[0].write_mask); +- memset(&value, 0, sizeof(value)); +- sm4_src_from_constant_value(&instr.srcs[2], &value, type->dimx, instr.dsts[0].write_mask); +- instr.src_count = 3; +- } +- else +- { +- instr.opcode = VKD3D_SM4_OP_MOV; +- +- sm4_src_from_deref(tpf, &instr.srcs[0], &load->src, instr.dsts[0].write_mask, &instr); +- instr.src_count = 1; +- } +- +- write_sm4_instruction(tpf, &instr); +-} +- -static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_loop *loop) -+static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) ++static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) { struct sm4_instruction instr = { -@@ -5715,10 +6218,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo +@@ -5715,10 +5329,11 @@ static void write_sm4_loop(const struct tpf_writer *tpf, const struct hlsl_ir_lo write_sm4_instruction(tpf, &instr); } @@ -19611,7 +26533,7 @@ index 84f641cc316..848e78a34d3 100644 struct vkd3d_shader_src_param *src; struct sm4_instruction instr; -@@ -5735,7 +6239,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ +@@ -5735,7 +5350,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ { if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset)) { @@ -19620,7 +26542,7 @@ index 84f641cc316..848e78a34d3 100644 { hlsl_error(tpf->ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET, "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5."); -@@ -5756,7 +6260,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ +@@ -5756,7 +5371,7 @@ static void write_sm4_gather(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } @@ -19629,61 +26551,292 @@ index 84f641cc316..848e78a34d3 100644 { const struct hlsl_ir_node *texel_offset = load->texel_offset.node; const struct hlsl_ir_node *sample_index = load->sample_index.node; -@@ -5825,7 +6329,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h +@@ -5825,45 +5440,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h } } -static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct hlsl_ir_resource_store *store) -+static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) - { - struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); - -@@ -5844,7 +6348,7 @@ static void write_sm4_resource_store(const struct tpf_writer *tpf, const struct - write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); - } - +-{ +- struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); +- +- if (!store->resource.var->is_uniform) +- { +- hlsl_fixme(tpf->ctx, &store->node.loc, "Store to non-uniform resource variable."); +- return; +- } +- +- if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) +- { +- hlsl_fixme(tpf->ctx, &store->node.loc, "Structured buffers store is not implemented."); +- return; +- } +- +- write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); +-} +- -static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_store *store) -+static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) - { - const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm4_instruction instr; -@@ -5853,7 +6357,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_MOV; - +-{ +- const struct hlsl_ir_node *rhs = store->rhs.node; +- struct sm4_instruction instr; +- uint32_t writemask; +- +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; +- - sm4_register_from_deref(tpf->ctx, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); -+ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &writemask, &store->lhs, &instr); - instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); - instr.dst_count = 1; - -@@ -5863,7 +6367,7 @@ static void write_sm4_store(const struct tpf_writer *tpf, const struct hlsl_ir_s - write_sm4_instruction(tpf, &instr); - } - +- instr.dsts[0].write_mask = hlsl_combine_writemasks(writemask, store->writemask); +- instr.dst_count = 1; +- +- sm4_src_from_node(tpf, &instr.srcs[0], rhs, instr.dsts[0].write_mask); +- instr.src_count = 1; +- +- write_sm4_instruction(tpf, &instr); +-} +- -static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_switch *s) -+static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) ++static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) { const struct hlsl_ir_node *selector = s->selector.node; struct hlsl_ir_switch_case *c; -@@ -5903,7 +6407,7 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ +@@ -5903,30 +5480,176 @@ static void write_sm4_switch(const struct tpf_writer *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir_swizzle *swizzle) -+static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ir_swizzle *swizzle) ++static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { - unsigned int hlsl_swizzle; - struct sm4_instruction instr; -@@ -5924,7 +6428,7 @@ static void write_sm4_swizzle(const struct tpf_writer *tpf, const struct hlsl_ir +- unsigned int hlsl_swizzle; +- struct sm4_instruction instr; +- uint32_t writemask; ++ const struct vkd3d_sm4_opcode_info *info; ++ struct sm4_instruction instr = {0}; ++ unsigned int dst_count, src_count; + +- memset(&instr, 0, sizeof(instr)); +- instr.opcode = VKD3D_SM4_OP_MOV; ++ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); ++ VKD3D_ASSERT(info); + +- sm4_dst_from_node(&instr.dsts[0], &swizzle->node); +- instr.dst_count = 1; ++ dst_count = opcode_info_get_dst_count(info); ++ src_count = opcode_info_get_src_count(info); + +- sm4_register_from_node(&instr.srcs[0].reg, &writemask, swizzle->val.node); +- hlsl_swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask), +- swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].write_mask); +- instr.srcs[0].swizzle = swizzle_from_sm4(hlsl_swizzle); +- instr.src_count = 1; ++ if (ins->dst_count != dst_count) ++ { ++ ERR("Invalid destination count %u for vsir instruction %#x (expected %u).\n", ++ ins->dst_count, ins->opcode, dst_count); ++ tpf->result = VKD3D_ERROR_INVALID_SHADER; ++ return; ++ } ++ if (ins->src_count != src_count) ++ { ++ ERR("Invalid source count %u for vsir instruction %#x (expected %u).\n", ++ ins->src_count, ins->opcode, src_count); ++ tpf->result = VKD3D_ERROR_INVALID_SHADER; ++ return; ++ } ++ ++ instr.opcode = info->opcode; ++ instr.extra_bits = ins->flags << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ instr.dst_count = ins->dst_count; ++ instr.src_count = ins->src_count; ++ ++ for (unsigned int i = 0; i < ins->dst_count; ++i) ++ { ++ instr.dsts[i] = ins->dst[i]; ++ ++ if (instr.dsts[i].modifiers & VKD3DSPDM_SATURATE) ++ { ++ /* For vsir SATURATE is a dst modifier, while for tpf it is an instruction flag. */ ++ VKD3D_ASSERT(ins->dst_count == 1); ++ instr.dsts[i].modifiers &= ~VKD3DSPDM_SATURATE; ++ instr.extra_bits |= VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ } ++ } ++ for (unsigned int i = 0; i < ins->src_count; ++i) ++ instr.srcs[i] = ins->src[i]; + write_sm4_instruction(tpf, &instr); } -static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_block *block) -+static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) ++static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) ++{ ++ switch (ins->opcode) ++ { ++ case VKD3DSIH_DCL_TEMPS: ++ tpf_dcl_temps(tpf, ins->declaration.count); ++ break; ++ ++ case VKD3DSIH_DCL_INDEXABLE_TEMP: ++ tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT: ++ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT, &ins->declaration.dst, 0); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_PS: ++ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS, &ins->declaration.dst, ins->flags); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_PS_SGV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SGV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_PS_SIV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SIV, &ins->declaration.register_semantic, ins->flags); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_SGV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SGV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_DCL_INPUT_SIV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SIV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_DCL_OUTPUT: ++ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT, &ins->declaration.dst, 0); ++ break; ++ ++ case VKD3DSIH_DCL_OUTPUT_SIV: ++ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); ++ break; ++ ++ case VKD3DSIH_ADD: ++ case VKD3DSIH_AND: ++ case VKD3DSIH_DIV: ++ case VKD3DSIH_DP2: ++ case VKD3DSIH_DP3: ++ case VKD3DSIH_DP4: ++ case VKD3DSIH_DSX: ++ case VKD3DSIH_DSX_COARSE: ++ case VKD3DSIH_DSX_FINE: ++ case VKD3DSIH_DSY: ++ case VKD3DSIH_DSY_COARSE: ++ case VKD3DSIH_DSY_FINE: ++ case VKD3DSIH_EQO: ++ case VKD3DSIH_EXP: ++ case VKD3DSIH_F16TOF32: ++ case VKD3DSIH_F32TOF16: ++ case VKD3DSIH_FRC: ++ case VKD3DSIH_FTOI: ++ case VKD3DSIH_FTOU: ++ case VKD3DSIH_GEO: ++ case VKD3DSIH_IADD: ++ case VKD3DSIH_IEQ: ++ case VKD3DSIH_IGE: ++ case VKD3DSIH_ILT: ++ case VKD3DSIH_IMAD: ++ case VKD3DSIH_IMAX: ++ case VKD3DSIH_IMIN: ++ case VKD3DSIH_IMUL: ++ case VKD3DSIH_INE: ++ case VKD3DSIH_INEG: ++ case VKD3DSIH_ISHL: ++ case VKD3DSIH_ISHR: ++ case VKD3DSIH_ITOF: ++ case VKD3DSIH_LOG: ++ case VKD3DSIH_LTO: ++ case VKD3DSIH_MAD: ++ case VKD3DSIH_MAX: ++ case VKD3DSIH_MIN: ++ case VKD3DSIH_MOV: ++ case VKD3DSIH_MOVC: ++ case VKD3DSIH_MUL: ++ case VKD3DSIH_NEU: ++ case VKD3DSIH_NOT: ++ case VKD3DSIH_OR: ++ case VKD3DSIH_RCP: ++ case VKD3DSIH_ROUND_NE: ++ case VKD3DSIH_ROUND_NI: ++ case VKD3DSIH_ROUND_PI: ++ case VKD3DSIH_ROUND_Z: ++ case VKD3DSIH_RSQ: ++ case VKD3DSIH_SAMPLE_INFO: ++ case VKD3DSIH_SINCOS: ++ case VKD3DSIH_SQRT: ++ case VKD3DSIH_STORE_RAW: ++ case VKD3DSIH_STORE_UAV_TYPED: ++ case VKD3DSIH_UDIV: ++ case VKD3DSIH_UGE: ++ case VKD3DSIH_ULT: ++ case VKD3DSIH_UMAX: ++ case VKD3DSIH_UMIN: ++ case VKD3DSIH_USHR: ++ case VKD3DSIH_UTOF: ++ case VKD3DSIH_XOR: ++ tpf_simple_instruction(tpf, ins); ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ break; ++ } ++} ++ ++static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) { const struct hlsl_ir_node *instr; ++ unsigned int vsir_instr_idx; -@@ -5998,18 +6502,65 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { +@@ -5952,10 +5675,6 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + case HLSL_IR_CONSTANT: + vkd3d_unreachable(); + +- case HLSL_IR_EXPR: +- write_sm4_expr(tpf, hlsl_ir_expr(instr)); +- break; +- + case HLSL_IR_IF: + write_sm4_if(tpf, hlsl_ir_if(instr)); + break; +@@ -5964,32 +5683,21 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + write_sm4_jump(tpf, hlsl_ir_jump(instr)); + break; + +- case HLSL_IR_LOAD: +- write_sm4_load(tpf, hlsl_ir_load(instr)); +- break; +- + case HLSL_IR_RESOURCE_LOAD: + write_sm4_resource_load(tpf, hlsl_ir_resource_load(instr)); + break; + +- case HLSL_IR_RESOURCE_STORE: +- write_sm4_resource_store(tpf, hlsl_ir_resource_store(instr)); +- break; +- + case HLSL_IR_LOOP: + write_sm4_loop(tpf, hlsl_ir_loop(instr)); + break; + +- case HLSL_IR_STORE: +- write_sm4_store(tpf, hlsl_ir_store(instr)); +- break; +- + case HLSL_IR_SWITCH: + write_sm4_switch(tpf, hlsl_ir_switch(instr)); + break; + +- case HLSL_IR_SWIZZLE: +- write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); ++ case HLSL_IR_VSIR_INSTRUCTION_REF: ++ vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; ++ tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); + break; + + default: +@@ -5998,18 +5706,26 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc } } @@ -19692,48 +26845,8 @@ index 84f641cc316..848e78a34d3 100644 +static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) { - const struct hlsl_profile_info *profile = ctx->profile; -+ struct hlsl_ctx *ctx = tpf->ctx; -+ const struct hlsl_scope *scope; -+ const struct hlsl_ir_var *var; -+ uint32_t temp_count; -+ -+ compute_liveness(ctx, func); -+ mark_indexable_vars(ctx, func); -+ temp_count = allocate_temp_registers(ctx, func); -+ if (ctx->result) -+ return; -+ -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if ((var->is_input_semantic && var->last_read) -+ || (var->is_output_semantic && var->first_write)) -+ tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); -+ } -+ + if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) -+ write_sm4_dcl_thread_group(tpf, ctx->thread_count); -+ -+ if (temp_count) -+ write_sm4_dcl_temps(tpf, temp_count); -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -+ continue; -+ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -+ continue; -+ -+ if (var->indexable) -+ { -+ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -+ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -+ -+ write_sm4_dcl_indexable_temp(tpf, id, size, 4); -+ } -+ } -+ } ++ tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); + + write_sm4_block(tpf, &func->body); + @@ -19752,10 +26865,11 @@ index 84f641cc316..848e78a34d3 100644 + struct hlsl_ctx *ctx = tpf->ctx; size_t token_count_position; - struct tpf_writer tpf; ++ uint32_t global_flags = 0; static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = { -@@ -6024,17 +6575,28 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, +@@ -6024,17 +5740,54 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, VKD3D_SM4_LIB, }; @@ -19768,6 +26882,27 @@ index 84f641cc316..848e78a34d3 100644 + put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); token_count_position = put_u32(&buffer, 0); ++ if (version->major == 4) ++ { ++ for (i = 0; i < extern_resources_count; ++i) ++ { ++ const struct extern_resource *resource = &extern_resources[i]; ++ const struct hlsl_type *type = resource->component_type; ++ ++ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) ++ { ++ global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; ++ break; ++ } ++ } ++ } ++ ++ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) ++ global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; ++ ++ if (global_flags) ++ write_sm4_dcl_global_flags(tpf, global_flags); ++ + if (version->type == VKD3D_SHADER_TYPE_HULL) + { + tpf_write_hs_decls(tpf); @@ -19778,6 +26913,11 @@ index 84f641cc316..848e78a34d3 100644 + tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); + tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); + } ++ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) ++ { ++ tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ ++ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); ++ } + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { @@ -19787,7 +26927,7 @@ index 84f641cc316..848e78a34d3 100644 } for (i = 0; i < extern_resources_count; ++i) -@@ -6042,59 +6604,40 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, +@@ -6042,59 +5795,37 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct extern_resource *resource = &extern_resources[i]; if (resource->regset == HLSL_REGSET_SAMPLERS) @@ -19798,16 +26938,14 @@ index 84f641cc316..848e78a34d3 100644 + write_sm4_dcl_textures(tpf, resource, false); else if (resource->regset == HLSL_REGSET_UAVS) - write_sm4_dcl_textures(&tpf, resource, true); -+ write_sm4_dcl_textures(tpf, resource, true); - } - +- } +- - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write)) - write_sm4_dcl_semantic(&tpf, var); -- } -+ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) -+ write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); ++ write_sm4_dcl_textures(tpf, resource, true); + } - if (profile->type == VKD3D_SHADER_TYPE_COMPUTE) - write_sm4_dcl_thread_group(&tpf, ctx->thread_count); @@ -19862,7 +27000,7 @@ index 84f641cc316..848e78a34d3 100644 uint64_t *flags; flags = vkd3d_calloc(1, sizeof(*flags)); -@@ -6110,29 +6653,94 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) +@@ -6110,29 +5841,101 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE, * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */ @@ -19934,6 +27072,7 @@ index 84f641cc316..848e78a34d3 100644 + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) +{ ++ enum vkd3d_shader_type shader_type = program->shader_version.type; + struct tpf_compiler tpf = {0}; + struct sm4_stat stat = {0}; size_t i; @@ -19946,6 +27085,11 @@ index 84f641cc316..848e78a34d3 100644 - write_sm4_rdef(ctx, &dxbc); - write_sm4_shdr(ctx, entry_func, &dxbc); - write_sm4_sfi0(ctx, &dxbc); +- +- if (!(ret = ctx->result)) +- ret = dxbc_writer_write(&dxbc, out); +- for (i = 0; i < dxbc.section_count; ++i) +- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); + tpf.ctx = ctx; + tpf.program = program; + tpf.buffer = NULL; @@ -19955,24 +27099,27 @@ index 84f641cc316..848e78a34d3 100644 + + tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); + tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) ++ if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) + tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); + write_sm4_rdef(ctx, &tpf.dxbc); + tpf_write_shdr(&tpf, entry_func); + tpf_write_sfi0(&tpf); + tpf_write_stat(&tpf); - - if (!(ret = ctx->result)) -- ret = dxbc_writer_write(&dxbc, out); -- for (i = 0; i < dxbc.section_count; ++i) -- vkd3d_shader_free_shader_code(&dxbc.sections[i].data); ++ ++ ret = VKD3D_OK; ++ if (ctx->result) ++ ret = ctx->result; ++ if (tpf.result) ++ ret = tpf.result; ++ ++ if (!ret) + ret = dxbc_writer_write(&tpf.dxbc, out); + for (i = 0; i < tpf.dxbc.section_count; ++i) + vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 306c1ca0dd8..ca012d4948a 100644 +index 306c1ca0dd8..db61eec8f28 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ @@ -19984,14 +27131,15 @@ index 306c1ca0dd8..ca012d4948a 100644 static inline int char_to_int(char c) { if ('0' <= c && c <= '9') -@@ -443,20 +445,47 @@ void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char +@@ -443,30 +445,57 @@ void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char bytecode_set_bytes(buffer, offset, string, length); } -static void vkd3d_shader_dump_blob(const char *path, const char *profile, - const char *suffix, const void *data, size_t size) +struct shader_dump_data -+{ + { +- static unsigned int shader_id = 0; + uint8_t checksum[16]; + const char *path; + const char *profile; @@ -20001,8 +27149,7 @@ index 306c1ca0dd8..ca012d4948a 100644 + +static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, + const void *data, size_t size, bool source) - { -- static unsigned int shader_id = 0; ++{ + static const char hexadecimal_digits[] = "0123456789abcdef"; + const uint8_t *checksum = dump_data->checksum; + char str_checksum[33]; @@ -20024,12 +27171,12 @@ index 306c1ca0dd8..ca012d4948a 100644 + str_checksum[32] = '\0'; + + pos = snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s", dump_data->path, str_checksum); -+ -+ if (dump_data->profile) -+ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); - if (profile) - snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u-%s.%s", path, id, profile, suffix); ++ if (dump_data->profile) ++ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); ++ + if (source) + pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-source.%s", dump_data->source_suffix); else @@ -20040,6 +27187,19 @@ index 306c1ca0dd8..ca012d4948a 100644 if ((f = fopen(filename, "wb"))) { if (fwrite(data, 1, size, f) != size) +- ERR("Failed to write shader to %s.\n", filename); ++ WARN("Failed to write shader to %s.\n", filename); + if (fclose(f)) +- ERR("Failed to close stream %s.\n", filename); ++ WARN("Failed to close stream %s.\n", filename); + } + else + { +- ERR("Failed to open %s for dumping shader.\n", filename); ++ WARN("Failed to open %s for dumping shader.\n", filename); + } + } + @@ -488,37 +517,61 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t } } @@ -20117,7 +27277,98 @@ index 306c1ca0dd8..ca012d4948a 100644 } static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) -@@ -1436,7 +1489,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh +@@ -627,6 +680,53 @@ static int vkd3d_shader_validate_compile_info(const struct vkd3d_shader_compile_ + return VKD3D_OK; + } + ++static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, ++ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) ++{ ++ enum vkd3d_result ret; ++ ++ switch (compile_info->source_type) ++ { ++ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: ++ ret = d3dbc_parse(compile_info, config_flags, message_context, program); ++ break; ++ ++ case VKD3D_SHADER_SOURCE_DXBC_TPF: ++ ret = tpf_parse(compile_info, config_flags, message_context, program); ++ break; ++ ++ case VKD3D_SHADER_SOURCE_DXBC_DXIL: ++ ret = dxil_parse(compile_info, config_flags, message_context, program); ++ break; ++ ++ default: ++ ERR("Unsupported source type %#x.\n", compile_info->source_type); ++ ret = VKD3D_ERROR_INVALID_ARGUMENT; ++ break; ++ } ++ ++ if (ret < 0) ++ { ++ WARN("Failed to parse shader.\n"); ++ return ret; ++ } ++ ++ if ((ret = vsir_program_validate(program, config_flags, compile_info->source_name, message_context)) < 0) ++ { ++ WARN("Failed to validate shader after parsing, ret %d.\n", ret); ++ ++ if (TRACE_ON()) ++ vsir_program_trace(program); ++ ++ vsir_program_cleanup(program); ++ return ret; ++ } ++ ++ if (compile_info->target_type != VKD3D_SHADER_TARGET_NONE) ++ ret = vsir_program_transform_early(program, config_flags, compile_info, message_context); ++ return ret; ++} ++ + void vkd3d_shader_free_messages(char *messages) + { + TRACE("messages %p.\n", messages); +@@ -707,6 +807,9 @@ struct vkd3d_shader_scan_context + + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; + size_t combined_samplers_size; ++ ++ enum vkd3d_shader_tessellator_output_primitive output_primitive; ++ enum vkd3d_shader_tessellator_partitioning partitioning; + }; + + static VKD3D_PRINTF_FUNC(3, 4) void vkd3d_shader_scan_error(struct vkd3d_shader_scan_context *context, +@@ -1164,6 +1267,12 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + VKD3D_SHADER_RESOURCE_BUFFER, VKD3D_SHADER_RESOURCE_DATA_UINT, 0, + instruction->declaration.structured_resource.byte_stride, false, instruction->flags); + break; ++ case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: ++ context->output_primitive = instruction->declaration.tessellator_output_primitive; ++ break; ++ case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: ++ context->partitioning = instruction->declaration.tessellator_partitioning; ++ break; + case VKD3DSIH_IF: + case VKD3DSIH_IFC: + cf_info = vkd3d_shader_scan_push_cf_info(context); +@@ -1404,6 +1513,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + struct vkd3d_shader_scan_descriptor_info1 *descriptor_info1) + { + struct vkd3d_shader_scan_combined_resource_sampler_info *combined_sampler_info; ++ struct vkd3d_shader_scan_hull_shader_tessellation_info *tessellation_info; + struct vkd3d_shader_scan_descriptor_info1 local_descriptor_info1 = {0}; + struct vkd3d_shader_scan_descriptor_info *descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; +@@ -1432,11 +1542,13 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + descriptor_info1 = &local_descriptor_info1; + } + ++ tessellation_info = vkd3d_find_struct(compile_info->next, SCAN_HULL_SHADER_TESSELLATION_INFO); ++ + vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, descriptor_info1, combined_sampler_info, message_context); if (TRACE_ON()) @@ -20126,7 +27377,20 @@ index 306c1ca0dd8..ca012d4948a 100644 for (i = 0; i < program->instructions.count; ++i) { -@@ -1497,6 +1550,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh +@@ -1475,6 +1587,12 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + if (!ret && descriptor_info) + ret = convert_descriptor_info(descriptor_info, descriptor_info1); + ++ if (!ret && tessellation_info) ++ { ++ tessellation_info->output_primitive = context.output_primitive; ++ tessellation_info->partitioning = context.partitioning; ++ } ++ + if (ret < 0) + { + if (combined_sampler_info) +@@ -1497,6 +1615,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; @@ -20134,7 +27398,7 @@ index 306c1ca0dd8..ca012d4948a 100644 int ret; TRACE("compile_info %p, messages %p.\n", compile_info, messages); -@@ -1511,7 +1565,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char +@@ -1511,7 +1630,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_message_context_init(&message_context, compile_info->log_level); @@ -20144,7 +27408,40 @@ index 306c1ca0dd8..ca012d4948a 100644 if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { -@@ -1565,6 +1620,7 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1523,31 +1643,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + uint64_t config_flags = vkd3d_shader_init_config_flags(); + struct vsir_program program; + +- switch (compile_info->source_type) +- { +- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: +- ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); +- break; +- +- case VKD3D_SHADER_SOURCE_DXBC_TPF: +- ret = tpf_parse(compile_info, config_flags, &message_context, &program); +- break; +- +- case VKD3D_SHADER_SOURCE_DXBC_DXIL: +- ret = dxil_parse(compile_info, config_flags, &message_context, &program); +- break; +- +- default: +- ERR("Unsupported source type %#x.\n", compile_info->source_type); +- ret = VKD3D_ERROR_INVALID_ARGUMENT; +- break; +- } +- +- if (ret < 0) +- { +- WARN("Failed to parse shader.\n"); +- } +- else ++ if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) + { + ret = vsir_program_scan(&program, compile_info, &message_context, NULL); + vsir_program_cleanup(&program); +@@ -1565,6 +1661,7 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { @@ -20152,7 +27449,7 @@ index 306c1ca0dd8..ca012d4948a 100644 struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; struct vkd3d_shader_compile_info scan_info; int ret; -@@ -1578,9 +1634,14 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1578,9 +1675,14 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, break; case VKD3D_SHADER_TARGET_GLSL: @@ -20168,21 +27465,21 @@ index 306c1ca0dd8..ca012d4948a 100644 vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; -@@ -1593,6 +1654,13 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1593,6 +1695,13 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; + case VKD3D_SHADER_TARGET_MSL: + if ((ret = vsir_program_scan(program, &scan_info, message_context, &scan_descriptor_info)) < 0) + return ret; -+ ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, message_context); ++ ret = msl_compile(program, config_flags, &scan_descriptor_info, compile_info, out, message_context); + vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); + break; + default: /* Validation should prevent us from reaching this. */ vkd3d_unreachable(); -@@ -1620,6 +1688,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1620,6 +1729,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { struct vkd3d_shader_message_context message_context; @@ -20190,7 +27487,7 @@ index 306c1ca0dd8..ca012d4948a 100644 int ret; TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); -@@ -1634,12 +1703,17 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, +@@ -1634,48 +1744,32 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_message_context_init(&message_context, compile_info->log_level); @@ -20209,16 +27506,47 @@ index 306c1ca0dd8..ca012d4948a 100644 else { uint64_t config_flags = vkd3d_shader_init_config_flags(); -@@ -1676,6 +1750,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + struct vsir_program program; + +- switch (compile_info->source_type) +- { +- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: +- ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); +- break; +- +- case VKD3D_SHADER_SOURCE_DXBC_TPF: +- ret = tpf_parse(compile_info, config_flags, &message_context, &program); +- break; +- +- case VKD3D_SHADER_SOURCE_DXBC_DXIL: +- ret = dxil_parse(compile_info, config_flags, &message_context, &program); +- break; +- +- default: +- ERR("Unsupported source type %#x.\n", compile_info->source_type); +- ret = VKD3D_ERROR_INVALID_ARGUMENT; +- break; +- } +- +- if (ret < 0) +- { +- WARN("Failed to parse shader.\n"); +- } +- else ++ if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) + { + ret = vsir_program_compile(&program, config_flags, compile_info, out, &message_context); + vsir_program_cleanup(&program); } } -+ vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); ++ if (ret >= 0) ++ vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); + vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) ret = VKD3D_ERROR_OUT_OF_MEMORY; -@@ -1777,6 +1853,8 @@ void shader_signature_cleanup(struct shader_signature *signature) +@@ -1777,6 +1871,8 @@ void shader_signature_cleanup(struct shader_signature *signature) } vkd3d_free(signature->elements); signature->elements = NULL; @@ -20227,7 +27555,7 @@ index 306c1ca0dd8..ca012d4948a 100644 } int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, -@@ -1868,6 +1946,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns +@@ -1868,6 +1964,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns #ifdef VKD3D_SHADER_UNSUPPORTED_DXIL VKD3D_SHADER_SOURCE_DXBC_DXIL, #endif @@ -20235,7 +27563,7 @@ index 306c1ca0dd8..ca012d4948a 100644 }; TRACE("count %p.\n", count); -@@ -1888,6 +1967,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1888,6 +1985,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, #ifdef VKD3D_SHADER_UNSUPPORTED_GLSL VKD3D_SHADER_TARGET_GLSL, @@ -20245,7 +27573,7 @@ index 306c1ca0dd8..ca012d4948a 100644 #endif }; -@@ -1923,6 +2005,11 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1923,6 +2023,11 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( }; #endif @@ -20257,7 +27585,7 @@ index 306c1ca0dd8..ca012d4948a 100644 TRACE("source_type %#x, count %p.\n", source_type, count); switch (source_type) -@@ -1945,6 +2032,10 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( +@@ -1945,6 +2050,10 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( return dxbc_dxil_types; #endif @@ -20269,7 +27597,7 @@ index 306c1ca0dd8..ca012d4948a 100644 *count = 0; return NULL; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index ef66a8ca07a..9df538a0da0 100644 +index ef66a8ca07a..ad04972b3fb 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -59,6 +59,9 @@ @@ -20347,7 +27675,23 @@ index ef66a8ca07a..9df538a0da0 100644 }; enum vkd3d_shader_opcode -@@ -625,6 +654,13 @@ enum vkd3d_shader_register_type +@@ -556,6 +585,8 @@ enum vkd3d_shader_opcode + VKD3DSIH_XOR, + + VKD3DSIH_INVALID, ++ ++ VKD3DSIH_COUNT, + }; + + enum vkd3d_shader_register_type +@@ -619,12 +650,20 @@ enum vkd3d_shader_register_type + VKD3DSPR_WAVELANECOUNT, + VKD3DSPR_WAVELANEINDEX, + VKD3DSPR_PARAMETER, ++ VKD3DSPR_POINT_COORD, + + VKD3DSPR_COUNT, + VKD3DSPR_INVALID = ~0u, }; @@ -20361,7 +27705,7 @@ index ef66a8ca07a..9df538a0da0 100644 enum vkd3d_shader_register_precision { VKD3D_SHADER_REGISTER_PRECISION_DEFAULT, -@@ -642,9 +678,6 @@ enum vkd3d_data_type +@@ -642,9 +681,6 @@ enum vkd3d_data_type { VKD3D_DATA_FLOAT, VKD3D_DATA_INT, @@ -20371,7 +27715,16 @@ index ef66a8ca07a..9df538a0da0 100644 VKD3D_DATA_UINT, VKD3D_DATA_UNORM, VKD3D_DATA_SNORM, -@@ -1042,6 +1075,9 @@ enum vkd3d_shader_input_sysval_semantic +@@ -740,7 +776,7 @@ enum vkd3d_shader_interpolation_mode + VKD3DSIM_COUNT = 8, + }; + +-enum vkd3d_shader_global_flags ++enum vsir_global_flags + { + VKD3DSGF_REFACTORING_ALLOWED = 0x01, + VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = 0x02, +@@ -1042,6 +1078,9 @@ enum vkd3d_shader_input_sysval_semantic struct signature_element { @@ -20381,7 +27734,16 @@ index ef66a8ca07a..9df538a0da0 100644 unsigned int sort_index; const char *semantic_name; unsigned int semantic_index; -@@ -1145,7 +1181,7 @@ struct vkd3d_shader_tgsm_structured +@@ -1080,6 +1119,8 @@ static inline bool vsir_sysval_semantic_is_clip_cull(enum vkd3d_shader_sysval_se + + struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature, + unsigned int reg_idx, unsigned int write_mask); ++bool vsir_signature_find_sysval(const struct shader_signature *signature, ++ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); + void shader_signature_cleanup(struct shader_signature *signature); + + struct dxbc_shader_desc +@@ -1145,7 +1186,7 @@ struct vkd3d_shader_tgsm_structured bool zero_init; }; @@ -20390,7 +27752,16 @@ index ef66a8ca07a..9df538a0da0 100644 { unsigned int x, y, z; }; -@@ -1224,7 +1260,7 @@ struct vkd3d_shader_instruction +@@ -1210,7 +1251,7 @@ struct vkd3d_shader_instruction + const struct vkd3d_shader_src_param *predicate; + union + { +- enum vkd3d_shader_global_flags global_flags; ++ enum vsir_global_flags global_flags; + struct vkd3d_shader_semantic semantic; + struct vkd3d_shader_register_semantic register_semantic; + struct vkd3d_shader_primitive_type primitive_type; +@@ -1224,7 +1265,7 @@ struct vkd3d_shader_instruction struct vkd3d_shader_structured_resource structured_resource; struct vkd3d_shader_tgsm_raw tgsm_raw; struct vkd3d_shader_tgsm_structured tgsm_structured; @@ -20399,7 +27770,7 @@ index ef66a8ca07a..9df538a0da0 100644 enum vkd3d_tessellator_domain tessellator_domain; enum vkd3d_shader_tessellator_output_primitive tessellator_output_primitive; enum vkd3d_shader_tessellator_partitioning tessellator_partitioning; -@@ -1344,8 +1380,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins +@@ -1344,8 +1385,6 @@ bool shader_instruction_array_add_icb(struct vkd3d_shader_instruction_array *ins struct vkd3d_shader_immediate_constant_buffer *icb); bool shader_instruction_array_clone_instruction(struct vkd3d_shader_instruction_array *instructions, unsigned int dst, unsigned int src); @@ -20408,7 +27779,7 @@ index ef66a8ca07a..9df538a0da0 100644 void shader_instruction_array_destroy(struct vkd3d_shader_instruction_array *instructions); enum vkd3d_shader_config_flags -@@ -1353,6 +1387,12 @@ enum vkd3d_shader_config_flags +@@ -1353,6 +1392,19 @@ enum vkd3d_shader_config_flags VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001, }; @@ -20417,11 +27788,18 @@ index ef66a8ca07a..9df538a0da0 100644 + VSIR_CF_STRUCTURED, + VSIR_CF_BLOCKS, +}; ++ ++enum vsir_normalisation_level ++{ ++ VSIR_NOT_NORMALISED, ++ VSIR_NORMALISED_HULL_CONTROL_POINT_IO, ++ VSIR_FULLY_NORMALISED_IO, ++}; + struct vsir_program { struct vkd3d_shader_version shader_version; -@@ -1367,11 +1407,16 @@ struct vsir_program +@@ -1367,11 +1419,19 @@ struct vsir_program bool free_parameters; unsigned int input_control_point_count, output_control_point_count; @@ -20430,23 +27808,28 @@ index ef66a8ca07a..9df538a0da0 100644 unsigned int block_count; unsigned int temp_count; unsigned int ssa_count; ++ enum vsir_global_flags global_flags; bool use_vocp; + bool has_point_size; ++ bool has_point_coord; ++ uint8_t diffuse_written_mask; + enum vsir_control_flow_type cf_type; -+ bool normalised_io; -+ bool normalised_hull_cp_io; ++ enum vsir_normalisation_level normalisation_level; ++ enum vkd3d_tessellator_domain tess_domain; const char **block_names; size_t block_name_count; -@@ -1384,11 +1429,17 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1384,11 +1444,19 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( const struct vsir_program *program, enum vkd3d_shader_parameter_name name); bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve); -enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -+ bool normalised_io); ++ enum vsir_normalisation_level normalisation_level); +enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); ++enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, const char *source_name, struct vkd3d_shader_message_context *message_context); @@ -20458,7 +27841,20 @@ index ef66a8ca07a..9df538a0da0 100644 static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) -@@ -1445,7 +1496,7 @@ struct vkd3d_shader_scan_descriptor_info1 +@@ -1417,12 +1485,6 @@ void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_pr + void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, + enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); + +-static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser, uint64_t config_flags) +-{ +- return vsir_program_validate(parser->program, config_flags, +- parser->location.source_name, parser->message_context); +-} +- + struct vkd3d_shader_descriptor_info1 + { + enum vkd3d_shader_descriptor_type type; +@@ -1445,7 +1507,7 @@ struct vkd3d_shader_scan_descriptor_info1 unsigned int descriptor_count; }; @@ -20467,7 +27863,7 @@ index ef66a8ca07a..9df538a0da0 100644 const char *shader_get_type_prefix(enum vkd3d_shader_type type); -@@ -1465,6 +1516,7 @@ enum vsir_asm_flags +@@ -1465,6 +1527,7 @@ enum vsir_asm_flags { VSIR_ASM_FLAG_NONE = 0, VSIR_ASM_FLAG_DUMP_TYPES = 0x1, @@ -20475,7 +27871,7 @@ index ef66a8ca07a..9df538a0da0 100644 }; enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, -@@ -1549,18 +1601,29 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st +@@ -1549,18 +1612,30 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, enum vkd3d_shader_error error, const char *format, va_list args); @@ -20491,6 +27887,7 @@ index ef66a8ca07a..9df538a0da0 100644 + uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); +bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, + const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); ++bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); +bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, + const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, + const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); @@ -20506,7 +27903,7 @@ index ef66a8ca07a..9df538a0da0 100644 void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); -@@ -1570,8 +1633,10 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, +@@ -1570,8 +1645,10 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); int glsl_compile(struct vsir_program *program, uint64_t config_flags, @@ -20519,14 +27916,15 @@ index ef66a8ca07a..9df538a0da0 100644 #define SPIRV_MAX_SRC_COUNT 6 -@@ -1580,7 +1645,17 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1580,7 +1657,18 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); +int msl_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, ++ struct vkd3d_shader_message_context *message_context); + +enum vkd3d_md5_variant +{ @@ -20538,7 +27936,7 @@ index ef66a8ca07a..9df538a0da0 100644 int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -@@ -1853,7 +1928,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, +@@ -1853,7 +1941,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) #define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) @@ -20548,7 +27946,7 @@ index ef66a8ca07a..9df538a0da0 100644 struct dxbc_writer { diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index dcc7690876f..5495809fcb9 100644 +index dcc7690876f..a55a97f6f2f 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -19,6 +19,7 @@ @@ -20559,7 +27957,16 @@ index dcc7690876f..5495809fcb9 100644 static void d3d12_fence_incref(struct d3d12_fence *fence); static void d3d12_fence_decref(struct d3d12_fence *fence); -@@ -2451,6 +2452,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL +@@ -2004,6 +2005,8 @@ static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *li + + vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size, + state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views)); ++ memset(bindings->vk_uav_counter_views, 0, ++ state->uav_counters.binding_count * sizeof(*bindings->vk_uav_counter_views)); + bindings->uav_counters_dirty = true; + } + } +@@ -2451,6 +2454,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL } list->is_recording = false; @@ -20567,7 +27974,7 @@ index dcc7690876f..5495809fcb9 100644 if (!list->is_valid) { -@@ -2479,7 +2481,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, +@@ -2479,7 +2483,7 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, list->fb_layer_count = 0; list->xfb_enabled = false; @@ -20576,7 +27983,7 @@ index dcc7690876f..5495809fcb9 100644 list->is_predicated = false; list->current_framebuffer = VK_NULL_HANDLE; -@@ -2793,39 +2795,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des +@@ -2793,39 +2797,30 @@ static bool vk_write_descriptor_set_from_d3d12_desc(VkWriteDescriptorSet *vk_des /* We use separate bindings for buffer and texture SRVs/UAVs. * See d3d12_root_signature_init(). For unbounded ranges the * descriptors exist in two consecutive sets, otherwise they occur @@ -20632,7 +28039,7 @@ index dcc7690876f..5495809fcb9 100644 break; case VKD3D_DESCRIPTOR_MAGIC_SAMPLER: -@@ -3078,7 +3071,7 @@ done: +@@ -3078,7 +3073,7 @@ done: vkd3d_free(vk_descriptor_writes); } @@ -20641,7 +28048,7 @@ index dcc7690876f..5495809fcb9 100644 enum vkd3d_pipeline_bind_point bind_point) { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; -@@ -3210,6 +3203,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) +@@ -3210,6 +3205,9 @@ static void command_list_flush_vk_heap_updates(struct d3d12_command_list *list) static void command_list_add_descriptor_heap(struct d3d12_command_list *list, struct d3d12_descriptor_heap *heap) { @@ -20651,7 +28058,7 @@ index dcc7690876f..5495809fcb9 100644 if (!contains_heap(list->descriptor_heaps, list->descriptor_heap_count, heap)) { if (list->descriptor_heap_count == ARRAY_SIZE(list->descriptor_heaps)) -@@ -3296,6 +3292,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list +@@ -3296,6 +3294,15 @@ static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); } @@ -20667,7 +28074,7 @@ index dcc7690876f..5495809fcb9 100644 static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); -@@ -3303,7 +3308,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l +@@ -3303,7 +3310,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false; @@ -20676,7 +28083,7 @@ index dcc7690876f..5495809fcb9 100644 return true; } -@@ -3320,7 +3325,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list +@@ -3320,7 +3327,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false; @@ -20685,7 +28092,7 @@ index dcc7690876f..5495809fcb9 100644 if (list->current_render_pass != VK_NULL_HANDLE) return true; -@@ -3351,6 +3356,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list +@@ -3351,6 +3358,12 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list list->xfb_enabled = true; } @@ -20698,7 +28105,56 @@ index dcc7690876f..5495809fcb9 100644 return true; } -@@ -5939,7 +5950,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr +@@ -4791,15 +4804,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; + const struct vkd3d_vk_device_procs *vk_procs; + VkBuffer buffers[ARRAY_SIZE(list->strides)]; ++ struct d3d12_device *device = list->device; ++ unsigned int i, stride, max_view_count; + struct d3d12_resource *resource; + bool invalidate = false; +- unsigned int i, stride; + + TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views); + +- vk_procs = &list->device->vk_procs; +- null_resources = &list->device->null_resources; +- gpu_va_allocator = &list->device->gpu_va_allocator; ++ vk_procs = &device->vk_procs; ++ null_resources = &device->null_resources; ++ gpu_va_allocator = &device->gpu_va_allocator; + + if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides))) + { +@@ -4807,6 +4821,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + return; + } + ++ max_view_count = device->vk_info.device_limits.maxVertexInputBindings; ++ if (start_slot < max_view_count) ++ max_view_count -= start_slot; ++ else ++ max_view_count = 0; ++ ++ /* Although simply skipping unsupported binding slots isn't especially ++ * likely to work well in the general case, applications sometimes ++ * explicitly set all 32 vertex buffer bindings slots supported by ++ * Direct3D 12, with unused slots set to NULL. "Spider-Man Remastered" is ++ * an example of such an application. */ ++ if (view_count > max_view_count) ++ { ++ for (i = max_view_count; i < view_count; ++i) ++ { ++ if (views && views[i].BufferLocation) ++ WARN("Ignoring unsupported vertex buffer slot %u.\n", start_slot + i); ++ } ++ view_count = max_view_count; ++ } ++ + for (i = 0; i < view_count; ++i) + { + if (views && views[i].BufferLocation) +@@ -5939,7 +5974,25 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, FLOAT min, FLOAT max) { @@ -20725,7 +28181,7 @@ index dcc7690876f..5495809fcb9 100644 } static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, -@@ -6189,8 +6218,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d +@@ -6189,8 +6242,6 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d list->allocator = allocator; @@ -20735,7 +28191,7 @@ index dcc7690876f..5495809fcb9 100644 if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 01841c89692..65339c7ba5d 100644 +index 01841c89692..e92373a36fa 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -102,6 +102,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = @@ -20746,8 +28202,306 @@ index 01841c89692..65339c7ba5d 100644 VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), +@@ -135,7 +136,8 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + }; + +- if (device->vk_info.EXT_mutable_descriptor_type && index && index != VKD3D_SET_INDEX_UAV_COUNTER ++ if (device->vk_info.EXT_mutable_descriptor_type ++ && index != VKD3D_SET_INDEX_MUTABLE && index != VKD3D_SET_INDEX_UAV_COUNTER + && device->vk_descriptor_heap_layouts[index].applicable_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + device->vk_descriptor_heap_layouts[index].vk_set_layout = VK_NULL_HANDLE; +@@ -143,7 +145,7 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic + } + + binding.binding = 0; +- binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && !index) ++ binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && index == VKD3D_SET_INDEX_MUTABLE) + ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[index].type; + binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count; + binding.stageFlags = VK_SHADER_STAGE_ALL; +@@ -199,14 +201,20 @@ static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device + { + static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] = + { +- {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, +- {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, +- {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, +- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, +- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, +- {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, +- /* UAV counters */ +- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, ++ [VKD3D_SET_INDEX_UNIFORM_BUFFER] = ++ {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, ++ [VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER] = ++ {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, ++ [VKD3D_SET_INDEX_SAMPLED_IMAGE] = ++ {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, ++ [VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER] = ++ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, ++ [VKD3D_SET_INDEX_STORAGE_IMAGE] = ++ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, ++ [VKD3D_SET_INDEX_SAMPLER] = ++ {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, ++ [VKD3D_SET_INDEX_UAV_COUNTER] = ++ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + }; + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + enum vkd3d_vk_descriptor_set_index set; +@@ -589,7 +597,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + } + if (!create_info->pfn_create_thread != !create_info->pfn_join_thread) + { +- ERR("Invalid create/join thread function pointers.\n"); ++ WARN("Invalid create/join thread function pointers.\n"); + return E_INVALIDARG; + } + if (create_info->wchar_size != 2 && create_info->wchar_size != 4) +@@ -607,7 +615,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + + if (FAILED(hr = vkd3d_init_vk_global_procs(instance, create_info->pfn_vkGetInstanceProcAddr))) + { +- ERR("Failed to initialise Vulkan global procs, hr %s.\n", debugstr_hresult(hr)); ++ WARN("Failed to initialise Vulkan global procs, hr %s.\n", debugstr_hresult(hr)); + return hr; + } + +@@ -689,7 +697,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + vkd3d_free(extensions); + if (vr < 0) + { +- ERR("Failed to create Vulkan instance, vr %d.\n", vr); ++ WARN("Failed to create Vulkan instance, vr %d.\n", vr); + if (instance->libvulkan) + vkd3d_dlclose(instance->libvulkan); + return hresult_from_vk_result(vr); +@@ -697,7 +705,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + + if (FAILED(hr = vkd3d_load_vk_instance_procs(&instance->vk_procs, vk_global_procs, vk_instance))) + { +- ERR("Failed to load instance procs, hr %s.\n", debugstr_hresult(hr)); ++ WARN("Failed to load instance procs, hr %s.\n", debugstr_hresult(hr)); + if (instance->vk_procs.vkDestroyInstance) + instance->vk_procs.vkDestroyInstance(vk_instance, NULL); + if (instance->libvulkan) +@@ -1572,6 +1580,111 @@ static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, + return S_OK; + } + ++static void vkd3d_override_caps(struct d3d12_device *device) ++{ ++ const char *caps_override, *p; ++ ++ static const struct override_value ++ { ++ const char *str; ++ uint32_t value; ++ } ++ feature_level_override_values[] = ++ { ++ {"11.0", D3D_FEATURE_LEVEL_11_0}, ++ {"11.1", D3D_FEATURE_LEVEL_11_1}, ++ {"12.0", D3D_FEATURE_LEVEL_12_0}, ++ {"12.1", D3D_FEATURE_LEVEL_12_1}, ++ {"12.2", D3D_FEATURE_LEVEL_12_2}, ++ }, ++ resource_binding_tier_override_values[] = ++ { ++ {"1", D3D12_RESOURCE_BINDING_TIER_1}, ++ {"2", D3D12_RESOURCE_BINDING_TIER_2}, ++ {"3", D3D12_RESOURCE_BINDING_TIER_3}, ++ }; ++ static const struct override_field ++ { ++ const char *name; ++ size_t offset; ++ const struct override_value *values; ++ size_t value_count; ++ } ++ override_fields[] = ++ { ++ { ++ "feature_level", ++ offsetof(struct d3d12_device, vk_info.max_feature_level), ++ feature_level_override_values, ++ ARRAY_SIZE(feature_level_override_values) ++ }, ++ { ++ "resource_binding_tier", ++ offsetof(struct d3d12_device, feature_options.ResourceBindingTier), ++ resource_binding_tier_override_values, ++ ARRAY_SIZE(resource_binding_tier_override_values) ++ }, ++ }; ++ ++ if (!(caps_override = getenv("VKD3D_CAPS_OVERRIDE"))) ++ return; ++ ++ p = caps_override; ++ for (;;) ++ { ++ size_t i; ++ ++ for (i = 0; i < ARRAY_SIZE(override_fields); ++i) ++ { ++ const struct override_field *field = &override_fields[i]; ++ size_t len = strlen(field->name); ++ ++ if (strncmp(p, field->name, len) == 0 && p[len] == '=') ++ { ++ size_t j; ++ ++ p += len + 1; ++ ++ for (j = 0; j < field->value_count; ++j) ++ { ++ const struct override_value *value = &field->values[j]; ++ size_t value_len = strlen(value->str); ++ ++ if (strncmp(p, value->str, value_len) == 0 ++ && (p[value_len] == '\0' || p[value_len] == ',')) ++ { ++ memcpy(&((uint8_t *)device)[field->offset], (uint8_t *)&value->value, sizeof(value->value)); ++ ++ p += value_len; ++ if (p[0] == '\0') ++ { ++ TRACE("Overriding caps with: %s\n", caps_override); ++ return; ++ } ++ p += 1; ++ ++ break; ++ } ++ } ++ ++ if (j == field->value_count) ++ { ++ WARN("Cannot parse the override caps string: %s\n", caps_override); ++ return; ++ } ++ ++ break; ++ } ++ } ++ ++ if (i == ARRAY_SIZE(override_fields)) ++ { ++ WARN("Cannot parse the override caps string: %s\n", caps_override); ++ return; ++ } ++ } ++} ++ + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + const struct vkd3d_device_create_info *create_info, + struct vkd3d_physical_device_info *physical_device_info, +@@ -1583,7 +1696,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; + VkPhysicalDevice physical_device = device->vk_physical_device; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; +- VkExtensionProperties *vk_extensions; ++ VkExtensionProperties *vk_extensions = NULL; + VkPhysicalDeviceFeatures *features; + uint32_t vk_extension_count; + HRESULT hr; +@@ -1741,6 +1854,9 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + vulkan_info->EXT_shader_viewport_index_layer; + + vkd3d_init_feature_level(vulkan_info, features, &device->feature_options); ++ ++ vkd3d_override_caps(device); ++ + if (vulkan_info->max_feature_level < create_info->minimum_feature_level) + { + WARN("Feature level %#x is not supported.\n", create_info->minimum_feature_level); +@@ -1809,6 +1925,28 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + && descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind + && descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind; + ++ if (device->use_vk_heaps && device->vk_info.KHR_push_descriptor) ++ { ++ /* VKD3D_SET_INDEX_COUNT for the Vulkan heaps, one for the push ++ * descriptors set and one for the static samplers set. */ ++ unsigned int descriptor_set_count = VKD3D_SET_INDEX_COUNT + 2; ++ ++ /* A mutable descriptor set can replace all those that should otherwise ++ * back the SRV-UAV-CBV descriptor heap. */ ++ if (device->vk_info.EXT_mutable_descriptor_type) ++ descriptor_set_count -= VKD3D_SET_INDEX_COUNT - (VKD3D_SET_INDEX_MUTABLE + 1); ++ ++ /* For many Vulkan implementations maxBoundDescriptorSets == 8; also, ++ * if mutable descriptors are not available the descriptor set count ++ * will be 9; so saving a descriptor set is going to be often ++ * significant. */ ++ if (descriptor_set_count > device->vk_info.device_limits.maxBoundDescriptorSets) ++ { ++ WARN("Disabling VK_KHR_push_descriptor to save a descriptor set.\n"); ++ device->vk_info.KHR_push_descriptor = VK_FALSE; ++ } ++ } ++ + if (device->use_vk_heaps) + vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->descriptor_indexing_properties); +@@ -1816,6 +1954,13 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits); + ++ TRACE("Device %p: using %s descriptor heaps, with%s descriptor indexing, " ++ "with%s push descriptors, with%s mutable descriptors\n", ++ device, device->use_vk_heaps ? "Vulkan" : "virtual", ++ device->vk_info.EXT_descriptor_indexing ? "" : "out", ++ device->vk_info.KHR_push_descriptor ? "" : "out", ++ device->vk_info.EXT_mutable_descriptor_type ? "" : "out"); ++ + vkd3d_chain_physical_device_info_structures(physical_device_info, device); + + return S_OK; +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index 6d6820d3752..1f7d90eb95f 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -2498,7 +2498,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea + enum vkd3d_vk_descriptor_set_index set, end; + unsigned int i = writes->count; + +- end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_UNIFORM_BUFFER ++ end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_MUTABLE + : VKD3D_SET_INDEX_STORAGE_IMAGE; + /* Binding a shader with the wrong null descriptor type works in Windows. + * To support that here we must write one to all applicable Vulkan sets. */ +@@ -4250,7 +4250,8 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descrip + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type + && device->vk_descriptor_heap_layouts[set].vk_set_layout) + { +- pool_sizes[pool_desc.poolSizeCount].type = (device->vk_info.EXT_mutable_descriptor_type && !set) ++ pool_sizes[pool_desc.poolSizeCount].type = ++ (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE) + ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[set].type; + pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors; + } +@@ -4280,11 +4281,12 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript + + if (!device->vk_descriptor_heap_layouts[set].vk_set_layout) + { +- /* Set 0 uses mutable descriptors, and this set is unused. */ +- if (!descriptor_heap->vk_descriptor_sets[0].vk_set +- && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0))) ++ /* Mutable descriptors are in use, and this set is unused. */ ++ if (!descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set ++ && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, ++ device, VKD3D_SET_INDEX_MUTABLE))) + return hr; +- descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[0].vk_set; ++ descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set; + descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; + return S_OK; + } diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 682d488faa8..8e5ec70a577 100644 +index 682d488faa8..2b0f81d3812 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -219,6 +219,30 @@ static VkShaderStageFlags stage_flags_from_visibility(D3D12_SHADER_VISIBILITY vi @@ -20997,13 +28751,10 @@ index 682d488faa8..8e5ec70a577 100644 if (context->unbounded_offset != UINT_MAX) - return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); - +- - return S_OK; -} -+ if (binding_idx) -+ *binding_idx = idx; - +- -static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, - unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, @@ -21017,7 +28768,8 @@ index 682d488faa8..8e5ec70a577 100644 - duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV - || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - && duplicate_descriptors; -- ++ d3d12_root_signature_append_vk_binding_array(root_signature, 0, context); + - *first_binding = context->descriptor_binding; - for (i = 0; i < binding_count; ++i) - { @@ -21025,7 +28777,9 @@ index 682d488faa8..8e5ec70a577 100644 - && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, - register_space, base_register_idx + i, true, shader_visibility, 1, context))) - return hr; -- ++ if (binding_idx) ++ *binding_idx = idx; + - if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, - base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) - return hr; @@ -21092,6 +28846,15 @@ index 682d488faa8..8e5ec70a577 100644 context->unbounded_offset = UINT_MAX; +@@ -998,7 +1016,7 @@ static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d + } + else + { +- binding->set = 0; ++ binding->set = VKD3D_SET_INDEX_MUTABLE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + } + } @@ -1107,18 +1125,19 @@ static int compare_descriptor_range(const void *a, const void *b) if ((ret = vkd3d_u32_compare(range_a->offset, range_b->offset))) return ret; @@ -21260,7 +29023,7 @@ index 682d488faa8..8e5ec70a577 100644 return S_OK; } -@@ -1477,6 +1450,34 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, +@@ -1477,26 +1450,57 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, return S_OK; } @@ -21295,7 +29058,38 @@ index 682d488faa8..8e5ec70a577 100644 static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, VkDescriptorSetLayout *vk_set_layouts) { -@@ -1508,7 +1509,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa + const struct d3d12_device *device = root_signature->device; + enum vkd3d_vk_descriptor_set_index set; ++ VkDescriptorSetLayout vk_set_layout; + unsigned int i; + + for (i = 0; i < root_signature->vk_set_count; ++i) + vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout; + +- if (device->use_vk_heaps) ++ if (!device->use_vk_heaps) ++ return i; ++ ++ for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + { +- VkDescriptorSetLayout mutable_layout = device->vk_descriptor_heap_layouts[0].vk_set_layout; ++ vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout; + +- for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) +- { +- VkDescriptorSetLayout vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout; +- /* All layouts must be valid, so if null, just set it to the mutable one. */ +- vk_set_layouts[i++] = vk_set_layout ? vk_set_layout : mutable_layout; +- } ++ VKD3D_ASSERT(vk_set_layout); ++ vk_set_layouts[i++] = vk_set_layout; ++ ++ if (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE) ++ break; + } + + return i; +@@ -1508,7 +1512,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa VkDescriptorSetLayout vk_layouts[VKD3D_MAX_DESCRIPTOR_SETS]; const struct vkd3d_vulkan_info *vk_info = &device->vk_info; struct vkd3d_descriptor_set_context context; @@ -21303,7 +29097,7 @@ index 682d488faa8..8e5ec70a577 100644 struct d3d12_root_signature_info info; bool use_vk_heaps; unsigned int i; -@@ -1516,7 +1516,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa +@@ -1516,7 +1519,6 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa memset(&context, 0, sizeof(context)); context.unbounded_offset = UINT_MAX; @@ -21311,7 +29105,7 @@ index 682d488faa8..8e5ec70a577 100644 root_signature->ID3D12RootSignature_iface.lpVtbl = &d3d12_root_signature_vtbl; root_signature->refcount = 1; -@@ -1578,20 +1577,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa +@@ -1578,20 +1580,14 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa sizeof(*root_signature->static_samplers)))) goto fail; @@ -21334,7 +29128,7 @@ index 682d488faa8..8e5ec70a577 100644 } root_signature->main_set = root_signature->vk_set_count; -@@ -1607,11 +1600,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa +@@ -1607,11 +1603,10 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa if (use_vk_heaps) d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context); @@ -21348,7 +29142,7 @@ index 682d488faa8..8e5ec70a577 100644 i = d3d12_root_signature_copy_descriptor_set_layouts(root_signature, vk_layouts); if (FAILED(hr = vkd3d_create_pipeline_layout(device, i, -@@ -1627,7 +1619,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa +@@ -1627,7 +1622,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa return S_OK; fail: @@ -21357,7 +29151,25 @@ index 682d488faa8..8e5ec70a577 100644 d3d12_root_signature_cleanup(root_signature, device); return hr; } -@@ -3867,6 +3859,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta +@@ -2286,7 +2281,7 @@ static HRESULT create_shader_stage(struct d3d12_device *device, + + const struct vkd3d_shader_compile_option options[] = + { +- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_13}, ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, + {VKD3D_SHADER_COMPILE_OPTION_WRITE_TESS_GEOM_POINT_SIZE, 0}, + {VKD3D_SHADER_COMPILE_OPTION_FEATURE, feature_flags_compile_option(device)}, +@@ -2341,7 +2336,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER + + const struct vkd3d_shader_compile_option options[] = + { +- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_13}, ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + {VKD3D_SHADER_COMPILE_OPTION_TYPED_UAV, typed_uav_compile_option(device)}, + }; + +@@ -3867,6 +3862,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta VK_DYNAMIC_STATE_SCISSOR, VK_DYNAMIC_STATE_BLEND_CONSTANTS, VK_DYNAMIC_STATE_STENCIL_REFERENCE, @@ -21365,6 +29177,15 @@ index 682d488faa8..8e5ec70a577 100644 }; static const VkPipelineDynamicStateCreateInfo dynamic_desc = { +@@ -4019,7 +4015,7 @@ static int compile_hlsl_cs(const struct vkd3d_shader_code *hlsl, struct vkd3d_sh + + static const struct vkd3d_shader_compile_option options[] = + { +- {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_13}, ++ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_14}, + }; + + info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index 831dc07af56..839bb173854 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c @@ -21399,7 +29220,7 @@ index 9eccec111c7..5215cf8ef86 100644 } diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index ba4e2e8488d..e6d477a5c12 100644 +index ba4e2e8488d..97a99782d6a 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -131,6 +131,7 @@ struct vkd3d_vulkan_info @@ -21410,7 +29231,37 @@ index ba4e2e8488d..e6d477a5c12 100644 bool EXT_depth_clip_enable; bool EXT_descriptor_indexing; bool EXT_fragment_shader_interlock; -@@ -1254,7 +1255,7 @@ struct d3d12_command_list +@@ -771,14 +772,21 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev + + enum vkd3d_vk_descriptor_set_index + { +- VKD3D_SET_INDEX_UNIFORM_BUFFER = 0, +- VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1, +- VKD3D_SET_INDEX_SAMPLED_IMAGE = 2, +- VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3, +- VKD3D_SET_INDEX_STORAGE_IMAGE = 4, +- VKD3D_SET_INDEX_SAMPLER = 5, +- VKD3D_SET_INDEX_UAV_COUNTER = 6, +- VKD3D_SET_INDEX_COUNT = 7 ++ VKD3D_SET_INDEX_SAMPLER, ++ VKD3D_SET_INDEX_UAV_COUNTER, ++ VKD3D_SET_INDEX_MUTABLE, ++ ++ /* These are used when mutable descriptors are not available to back ++ * SRV-UAV-CBV descriptor heaps. They must stay at the end of this ++ * enumeration, so that they can be ignored when mutable descriptors are ++ * used. */ ++ VKD3D_SET_INDEX_UNIFORM_BUFFER = VKD3D_SET_INDEX_MUTABLE, ++ VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER, ++ VKD3D_SET_INDEX_SAMPLED_IMAGE, ++ VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER, ++ VKD3D_SET_INDEX_STORAGE_IMAGE, ++ ++ VKD3D_SET_INDEX_COUNT + }; + + extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; +@@ -1254,7 +1262,7 @@ struct d3d12_command_list VkFormat dsv_format; bool xfb_enabled; @@ -21419,7 +29270,7 @@ index ba4e2e8488d..e6d477a5c12 100644 bool is_predicated; VkFramebuffer current_framebuffer; -@@ -1271,7 +1272,6 @@ struct d3d12_command_list +@@ -1271,7 +1279,6 @@ struct d3d12_command_list VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch deleted file mode 100644 index e055f6e1..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch +++ /dev/null @@ -1,2560 +0,0 @@ -From 3efea156cd1704556ea6e97b7de64c36232ac5ef Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 23 Oct 2024 13:50:07 +1100 -Subject: [PATCH] Updated vkd3d to 4889c71857ce2152a9c9e014b9f5831f96dc349b. - ---- - libs/vkd3d/include/vkd3d_shader.h | 33 ++ - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 7 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 103 ++++- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 155 ++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 11 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 2 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 48 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 400 ++++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/ir.c | 233 +++++++++- - libs/vkd3d/libs/vkd3d-shader/msl.c | 7 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 25 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 119 ++++-- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 19 +- - libs/vkd3d/libs/vkd3d/command.c | 2 + - libs/vkd3d/libs/vkd3d/device.c | 135 ++++++ - 18 files changed, 1121 insertions(+), 190 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 5c0d13ea9e2..1476387c6bd 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -700,6 +700,39 @@ enum vkd3d_shader_parameter_name - * \since 1.14 - */ - VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE_MAX, -+ /** -+ * Whether texture coordinate inputs should take their values from the -+ * point coordinate. -+ * -+ * When this parameter is provided to a pixel shader, and the value is -+ * nonzero, any fragment shader input with the semantic name "TEXCOORD" -+ * takes its value from the point coordinates instead of from the previous -+ * shader. The point coordinates here are defined as a four-component vector -+ * whose X and Y components are the X and Y coordinates of the fragment -+ * within a point being rasterized, and whose Z and W components are zero. -+ * -+ * In GLSL, the X and Y components are drawn from gl_PointCoord; in SPIR-V, -+ * they are drawn from a variable with the BuiltinPointCoord decoration. -+ * -+ * This includes t# fragment shader inputs in shader model 2 shaders, -+ * as well as texture sampling in shader model 1 shaders. -+ * -+ * This parameter can be used to implement fixed function point sprite, as -+ * present in Direct3D versions 8 and 9, if the target environment does not -+ * support point sprite as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ * -+ * The default value is zero, i.e. use the original varyings. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.14 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 38d566d9fe0..bc28aebed4d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -393,14 +393,13 @@ static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type regi - } - } - --static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, -- enum vkd3d_shader_global_flags global_flags) -+static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler, enum vsir_global_flags global_flags) - { - unsigned int i; - - static const struct - { -- enum vkd3d_shader_global_flags flag; -+ enum vsir_global_flags flag; - const char *name; - } - global_flag_info[] = -@@ -1190,6 +1189,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); - break; - -+ case VKD3DSPR_POINT_COORD: -+ vkd3d_string_buffer_printf(buffer, "vPointCoord"); -+ break; -+ - default: - vkd3d_string_buffer_printf(buffer, "%s%s", - compiler->colours.error, reg->type, compiler->colours.reset); -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index ae8e864c179..bbebf86e6d5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1320,7 +1320,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, &version, -- code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, false)) -+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index f9f44f34bcf..570af5eca5a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -9564,7 +9564,7 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons - - static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm6_metadata_value *m) - { -- enum vkd3d_shader_global_flags global_flags, mask, rotated_flags; -+ enum vsir_global_flags global_flags, mask, rotated_flags; - struct vkd3d_shader_instruction *ins; - - if (!sm6_metadata_get_uint64_value(sm6, m, (uint64_t*)&global_flags)) -@@ -9574,7 +9574,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm - "Global flags metadata value is not an integer."); - return; - } -- /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vkd3d_shader_global_flags. */ -+ /* Rotate SKIP_OPTIMIZATION from bit 0 to bit 4 to match vsir_global_flags. */ - mask = (VKD3DSGF_SKIP_OPTIMIZATION << 1) - 1; - rotated_flags = global_flags & mask; - rotated_flags = (rotated_flags >> 1) | ((rotated_flags & 1) << 4); -@@ -9582,6 +9582,7 @@ static void sm6_parser_emit_global_flags(struct sm6_parser *sm6, const struct sm - - ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_GLOBAL_FLAGS); - ins->declaration.global_flags = global_flags; -+ sm6->p.program->global_flags = global_flags; - } - - static enum vkd3d_result sm6_parser_emit_thread_group(struct sm6_parser *sm6, const struct sm6_metadata_value *m) -@@ -10312,7 +10313,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - /* Estimate instruction count to avoid reallocation in most shaders. */ - count = max(token_count, 400) - 400; - if (!vsir_program_init(program, compile_info, &version, -- (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, true)) -+ (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS, VSIR_FULLY_NORMALISED_IO)) - return VKD3D_ERROR_OUT_OF_MEMORY; - vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); - sm6->ptr = &sm6->start[1]; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index f3f7a2c765c..d901f08d50d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -2854,6 +2854,7 @@ struct fx_parser - struct vkd3d_shader_message_context *message_context; - struct vkd3d_string_buffer buffer; - unsigned int indent; -+ unsigned int version; - struct - { - const uint8_t *ptr; -@@ -2862,6 +2863,7 @@ struct fx_parser - } unstructured; - uint32_t buffer_count; - uint32_t object_count; -+ uint32_t group_count; - bool failed; - }; - -@@ -3085,7 +3087,6 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) - vkd3d_string_buffer_printf(&parser->buffer, ">"); - } - -- - static void fx_parse_fx_4_numeric_variables(struct fx_parser *parser, uint32_t count) - { - struct fx_4_numeric_variable -@@ -3212,6 +3213,97 @@ static void fx_4_parse_objects(struct fx_parser *parser) - } - } - -+static void fx_parse_fx_4_technique(struct fx_parser *parser) -+{ -+ struct fx_technique -+ { -+ uint32_t name; -+ uint32_t count; -+ } technique; -+ struct fx_pass -+ { -+ uint32_t name; -+ uint32_t count; -+ } pass; -+ const char *name; -+ uint32_t i; -+ -+ if (parser->failed) -+ return; -+ -+ fx_parser_read_u32s(parser, &technique, sizeof(technique)); -+ -+ name = fx_4_get_string(parser, technique.name); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "technique%u %s", parser->version, name); -+ fx_parse_fx_4_annotations(parser); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); -+ -+ parse_fx_start_indent(parser); -+ for (i = 0; i < technique.count; ++i) -+ { -+ fx_parser_read_u32s(parser, &pass, sizeof(pass)); -+ name = fx_4_get_string(parser, pass.name); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "pass %s", name); -+ fx_parse_fx_4_annotations(parser); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n"); -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "{\n"); -+ -+ if (pass.count) -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Parsing pass states is not implemented.\n"); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+ } -+ -+ parse_fx_end_indent(parser); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+} -+ -+static void fx_parse_groups(struct fx_parser *parser) -+{ -+ struct fx_group -+ { -+ uint32_t name; -+ uint32_t count; -+ } group; -+ const char *name; -+ uint32_t i, j; -+ -+ if (parser->failed) -+ return; -+ -+ for (i = 0; i < parser->group_count; ++i) -+ { -+ fx_parser_read_u32s(parser, &group, sizeof(group)); -+ -+ name = fx_4_get_string(parser, group.name); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "fxgroup %s", name); -+ fx_parse_fx_4_annotations(parser); -+ -+ vkd3d_string_buffer_printf(&parser->buffer, "\n{\n"); -+ parse_fx_start_indent(parser); -+ -+ for (j = 0; j < group.count; ++j) -+ fx_parse_fx_4_technique(parser); -+ -+ parse_fx_end_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -+ } -+} -+ - static int fx_4_parse(struct fx_parser *parser) - { - struct fx_4_header -@@ -3236,7 +3328,9 @@ static int fx_4_parse(struct fx_parser *parser) - uint32_t shader_count; - uint32_t inline_shader_count; - } header; -+ uint32_t i; - -+ parser->version = 10; - fx_parser_read_u32s(parser, &header, sizeof(header)); - parser->buffer_count = header.buffer_count; - parser->object_count = header.object_count; -@@ -3255,6 +3349,9 @@ static int fx_4_parse(struct fx_parser *parser) - fx_parse_buffers(parser); - fx_4_parse_objects(parser); - -+ for (i = 0; i < header.technique_count; ++i) -+ fx_parse_fx_4_technique(parser); -+ - return parser->failed ? - 1 : 0; - } - -@@ -3288,9 +3385,11 @@ static int fx_5_parse(struct fx_parser *parser) - uint32_t class_instance_element_count; - } header; - -+ parser->version = 11; - fx_parser_read_u32s(parser, &header, sizeof(header)); - parser->buffer_count = header.buffer_count; - parser->object_count = header.object_count; -+ parser->group_count = header.group_count; - - if (parser->end - parser->ptr < header.unstructured_size) - { -@@ -3306,6 +3405,8 @@ static int fx_5_parse(struct fx_parser *parser) - fx_parse_buffers(parser); - fx_4_parse_objects(parser); - -+ fx_parse_groups(parser); -+ - return parser->failed ? - 1 : 0; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 46515818d07..a2a090e1c21 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -274,6 +274,10 @@ static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, - gen->prefix, reg->idx[0].offset, reg->idx[2].offset); - break; - -+ case VKD3DSPR_THREADID: -+ vkd3d_string_buffer_printf(buffer, "gl_GlobalInvocationID"); -+ break; -+ - case VKD3DSPR_IDXTEMP: - vkd3d_string_buffer_printf(buffer, "x%u", reg->idx[0].offset); - shader_glsl_print_subscript(buffer, gen, reg->idx[1].rel_addr, reg->idx[1].offset); -@@ -385,7 +389,7 @@ static void shader_glsl_print_src(struct vkd3d_string_buffer *buffer, struct vkd - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled 'non-uniform' modifier."); - -- if (reg->type == VKD3DSPR_IMMCONST) -+ if (reg->type == VKD3DSPR_IMMCONST || reg->type == VKD3DSPR_THREADID) - src_data_type = VKD3D_DATA_UINT; - else - src_data_type = VKD3D_DATA_FLOAT; -@@ -825,6 +829,37 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ - glsl_dst_cleanup(&dst, &gen->string_buffers); - } - -+static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *coord, const struct vkd3d_shader_src_param *ref, unsigned int coord_size) -+{ -+ uint32_t coord_mask = vkd3d_write_mask_from_component_count(coord_size); -+ -+ switch (coord_size) -+ { -+ case 1: -+ vkd3d_string_buffer_printf(buffer, "vec3("); -+ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ", 0.0, "); -+ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ")"); -+ break; -+ -+ case 4: -+ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ", "); -+ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); -+ break; -+ -+ default: -+ vkd3d_string_buffer_printf(buffer, "vec%u(", coord_size + 1); -+ shader_glsl_print_src(buffer, gen, coord, coord_mask, coord->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ", "); -+ shader_glsl_print_src(buffer, gen, ref, VKD3DSP_WRITEMASK_0, ref->reg.data_type); -+ vkd3d_string_buffer_printf(buffer, ")"); -+ break; -+ } -+} -+ - static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - const struct glsl_resource_type_info *resource_type_info; -@@ -835,9 +870,9 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - enum vkd3d_shader_resource_type resource_type; - struct vkd3d_string_buffer *sample; - enum vkd3d_data_type data_type; -- struct glsl_src coord; -+ unsigned int coord_size; - struct glsl_dst dst; -- uint32_t coord_mask; -+ bool shadow; - - if (vkd3d_shader_instruction_has_texel_offset(ins)) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -@@ -868,13 +903,13 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - - if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) - { -- coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); -+ coord_size = resource_type_info->coord_size; - } - else - { - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled resource type %#x.", resource_type); -- coord_mask = vkd3d_write_mask_from_component_count(2); -+ coord_size = 2; - } - - sampler_id = ins->src[2].reg.idx[0].offset; -@@ -882,6 +917,20 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) - { - sampler_space = d->register_space; -+ shadow = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; -+ -+ if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ { -+ if (!shadow) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); -+ } -+ else -+ { -+ if (shadow) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); -+ } - } - else - { -@@ -891,17 +940,94 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - } - - glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -- glsl_src_init(&coord, gen, &ins->src[0], coord_mask); - sample = vkd3d_string_buffer_get(&gen->string_buffers); - -- vkd3d_string_buffer_printf(sample, "texture("); -+ if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ vkd3d_string_buffer_printf(sample, "textureLod("); -+ else -+ vkd3d_string_buffer_printf(sample, "texture("); - shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); -- vkd3d_string_buffer_printf(sample, ", %s)", coord.str->buffer); -+ vkd3d_string_buffer_printf(sample, ", "); -+ if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size); -+ else -+ shader_glsl_print_src(sample, gen, &ins->src[0], -+ vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type); -+ if (ins->opcode == VKD3DSIH_SAMPLE_B) -+ { -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); -+ } -+ else if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ { -+ vkd3d_string_buffer_printf(sample, ", 0.0"); -+ } -+ vkd3d_string_buffer_printf(sample, ")"); - shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); - - shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); - - vkd3d_string_buffer_release(&gen->string_buffers, sample); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_load_uav_typed(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct glsl_resource_type_info *resource_type_info; -+ enum vkd3d_shader_component_type component_type; -+ const struct vkd3d_shader_descriptor_info1 *d; -+ enum vkd3d_shader_resource_type resource_type; -+ unsigned int uav_id, uav_idx, uav_space; -+ struct vkd3d_string_buffer *load; -+ struct glsl_src coord; -+ struct glsl_dst dst; -+ uint32_t coord_mask; -+ -+ if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -+ "Descriptor indexing is not supported."); -+ -+ uav_id = ins->src[1].reg.idx[0].offset; -+ uav_idx = ins->src[1].reg.idx[1].offset; -+ if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, uav_id))) -+ { -+ resource_type = d->resource_type; -+ uav_space = d->register_space; -+ component_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Undeclared UAV descriptor %u.", uav_id); -+ uav_space = 0; -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) -+ { -+ coord_mask = vkd3d_write_mask_from_component_count(resource_type_info->coord_size); -+ } -+ else -+ { -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled UAV type %#x.", resource_type); -+ coord_mask = vkd3d_write_mask_from_component_count(2); -+ } -+ -+ glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&coord, gen, &ins->src[0], coord_mask); -+ load = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ vkd3d_string_buffer_printf(load, "imageLoad("); -+ shader_glsl_print_image_name(load, gen, uav_idx, uav_space); -+ vkd3d_string_buffer_printf(load, ", %s)", coord.str->buffer); -+ shader_glsl_print_swizzle(load, ins->src[1].swizzle, ins->dst[0].write_mask); -+ -+ shader_glsl_print_assignment_ext(gen, &dst, -+ vkd3d_data_type_from_component_type(component_type), "%s", load->buffer); -+ -+ vkd3d_string_buffer_release(&gen->string_buffers, load); - glsl_src_cleanup(&coord, &gen->string_buffers); - glsl_dst_cleanup(&dst, &gen->string_buffers); - } -@@ -1385,6 +1511,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_LD: - shader_glsl_ld(gen, ins); - break; -+ case VKD3DSIH_LD_UAV_TYPED: -+ shader_glsl_load_uav_typed(gen, ins); -+ break; - case VKD3DSIH_LOG: - shader_glsl_intrinsic(gen, ins, "log2"); - break; -@@ -1425,6 +1554,9 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - shader_glsl_intrinsic(gen, ins, "inversesqrt"); - break; - case VKD3DSIH_SAMPLE: -+ case VKD3DSIH_SAMPLE_B: -+ case VKD3DSIH_SAMPLE_C: -+ case VKD3DSIH_SAMPLE_C_LZ: - shader_glsl_sample(gen, ins); - break; - case VKD3DSIH_SQRT: -@@ -2078,6 +2210,10 @@ static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) - group_size->x, group_size->y, group_size->z); - } - -+ if (program->global_flags) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)program->global_flags); -+ - shader_glsl_generate_descriptor_declarations(gen); - shader_glsl_generate_input_declarations(gen); - shader_glsl_generate_output_declarations(gen); -@@ -2213,8 +2349,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, - if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; - -- VKD3D_ASSERT(program->normalised_io); -- VKD3D_ASSERT(program->normalised_hull_cp_io); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); - - vkd3d_glsl_generator_init(&generator, program, compile_info, - descriptor_info, combined_sampler_info, message_context); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 6ad0117fd5c..c7aa148ea11 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -3164,6 +3164,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_DSY_FINE] = "dsy_fine", - [HLSL_OP1_EXP2] = "exp2", - [HLSL_OP1_F16TOF32] = "f16tof32", -+ [HLSL_OP1_F32TOF16] = "f32tof16", - [HLSL_OP1_FLOOR] = "floor", - [HLSL_OP1_FRACT] = "fract", - [HLSL_OP1_LOG2] = "log2", -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index efe3aec024b..e234cd0ba40 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -410,10 +410,12 @@ struct hlsl_attribute - #define HLSL_MODIFIER_SINGLE 0x00020000 - #define HLSL_MODIFIER_EXPORT 0x00040000 - #define HLSL_STORAGE_ANNOTATION 0x00080000 -+#define HLSL_MODIFIER_UNORM 0x00100000 -+#define HLSL_MODIFIER_SNORM 0x00200000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -- HLSL_MODIFIER_COLUMN_MAJOR) -+ HLSL_MODIFIER_COLUMN_MAJOR | HLSL_MODIFIER_UNORM | HLSL_MODIFIER_SNORM) - - #define HLSL_INTERPOLATION_MODIFIERS_MASK (HLSL_STORAGE_NOINTERPOLATION | HLSL_STORAGE_CENTROID | \ - HLSL_STORAGE_NOPERSPECTIVE | HLSL_STORAGE_LINEAR) -@@ -514,6 +516,9 @@ struct hlsl_ir_var - - /* Whether the shader performs dereferences with non-constant offsets in the variable. */ - bool indexable; -+ /* Whether this is a semantic variable that was split from an array, or is the first -+ * element of a struct, and thus needs to be aligned when packed in the signature. */ -+ bool force_align; - - uint32_t is_input_semantic : 1; - uint32_t is_output_semantic : 1; -@@ -688,6 +693,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_DSY_FINE, - HLSL_OP1_EXP2, - HLSL_OP1_F16TOF32, -+ HLSL_OP1_F32TOF16, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, - HLSL_OP1_LOG2, -@@ -1634,6 +1640,9 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - -+enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, -+ unsigned int storage_modifiers); -+ - struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ctx, const char *name, const char *hlsl); - - int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hlsl); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 97d8b13772b..ca983fc5ffd 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -142,6 +142,7 @@ samplerCUBE {return KW_SAMPLERCUBE; } - SamplerState {return KW_SAMPLER; } - sampler_state {return KW_SAMPLER_STATE; } - shared {return KW_SHARED; } -+snorm {return KW_SNORM; } - stateblock {return KW_STATEBLOCK; } - stateblock_state {return KW_STATEBLOCK_STATE; } - static {return KW_STATIC; } -@@ -171,6 +172,7 @@ true {return KW_TRUE; } - typedef {return KW_TYPEDEF; } - unsigned {return KW_UNSIGNED; } - uniform {return KW_UNIFORM; } -+unorm {return KW_UNORM; } - vector {return KW_VECTOR; } - VertexShader {return KW_VERTEXSHADER; } - vertexshader {return KW_VERTEXSHADER; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 213cec79c3d..49cff4c81b8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -4024,6 +4024,21 @@ static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, - return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); - } - -+static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *type; -+ -+ if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) -+ return false; -+ -+ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); -+ -+ operands[0] = params->args[0]; -+ return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); -+} -+ - static bool intrinsic_floor(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -5199,6 +5214,7 @@ intrinsic_functions[] = - {"exp", 1, true, intrinsic_exp}, - {"exp2", 1, true, intrinsic_exp2}, - {"f16tof32", 1, true, intrinsic_f16tof32}, -+ {"f32tof16", 1, true, intrinsic_f32tof16}, - {"faceforward", 3, true, intrinsic_faceforward}, - {"floor", 1, true, intrinsic_floor}, - {"fmod", 2, true, intrinsic_fmod}, -@@ -6479,6 +6495,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_SAMPLER_STATE - %token KW_SAMPLERCOMPARISONSTATE - %token KW_SHARED -+%token KW_SNORM - %token KW_STATEBLOCK - %token KW_STATEBLOCK_STATE - %token KW_STATIC -@@ -6503,6 +6520,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_TYPEDEF - %token KW_UNSIGNED - %token KW_UNIFORM -+%token KW_UNORM - %token KW_VECTOR - %token KW_VERTEXSHADER - %token KW_VOID -@@ -6642,6 +6660,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %type type - %type type_no_void - %type typedef_type -+%type resource_format - - %type state_block_list - %type type_spec -@@ -7637,6 +7656,15 @@ rov_type: - $$ = HLSL_SAMPLER_DIM_3D; - } - -+resource_format: -+ var_modifiers type -+ { -+ uint32_t modifiers = $1; -+ -+ if (!($$ = apply_type_modifiers(ctx, $2, &modifiers, false, &@1))) -+ YYABORT; -+ } -+ - type_no_void: - KW_VECTOR '<' type ',' C_INTEGER '>' - { -@@ -7730,18 +7758,18 @@ type_no_void: - { - $$ = hlsl_new_texture_type(ctx, $1, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0); - } -- | texture_type '<' type '>' -+ | texture_type '<' resource_format '>' - { - validate_texture_format_type(ctx, $3, &@3); - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } -- | texture_ms_type '<' type '>' -+ | texture_ms_type '<' resource_format '>' - { - validate_texture_format_type(ctx, $3, &@3); - - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } -- | texture_ms_type '<' type ',' shift_expr '>' -+ | texture_ms_type '<' resource_format ',' shift_expr '>' - { - unsigned int sample_count; - struct hlsl_block block; -@@ -7757,14 +7785,14 @@ type_no_void: - - $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); - } -- | uav_type '<' type '>' -+ | uav_type '<' resource_format '>' - { - validate_uav_type(ctx, $1, $3, &@3); - $$ = hlsl_new_uav_type(ctx, $1, $3, false); - } -- | rov_type '<' type '>' -+ | rov_type '<' resource_format '>' - { -- validate_uav_type(ctx, $1, $3, &@3); -+ validate_uav_type(ctx, $1, $3, &@4); - $$ = hlsl_new_uav_type(ctx, $1, $3, true); - } - | KW_STRING -@@ -8314,6 +8342,14 @@ var_modifiers: - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_EXPORT, &@1); - } -+ | KW_UNORM var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_UNORM, &@1); -+ } -+ | KW_SNORM var_modifiers -+ { -+ $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SNORM, &@1); -+ } - | var_identifier var_modifiers - { - $$ = $2; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 88bec8610cb..6e1b2b437b0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -278,7 +278,7 @@ static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hls - - static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, - struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic, -- uint32_t index, bool output, const struct vkd3d_shader_location *loc) -+ uint32_t index, bool output, bool force_align, const struct vkd3d_shader_location *loc) - { - struct hlsl_semantic new_semantic; - struct hlsl_ir_var *ext_var; -@@ -338,14 +338,32 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - else - ext_var->is_input_semantic = 1; - ext_var->is_param = var->is_param; -+ ext_var->force_align = force_align; - list_add_before(&var->scope_entry, &ext_var->scope_entry); - list_add_tail(&func->extern_vars, &ext_var->extern_entry); - - return ext_var; - } - -+static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t field_modifiers) -+{ -+ field_modifiers |= modifiers; -+ -+ /* TODO: 'sample' modifier is not supported yet. */ -+ -+ /* 'nointerpolation' always takes precedence, next the same is done for -+ * 'sample', remaining modifiers are combined. */ -+ if (field_modifiers & HLSL_STORAGE_NOINTERPOLATION) -+ { -+ field_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; -+ field_modifiers |= HLSL_STORAGE_NOINTERPOLATION; -+ } -+ -+ return field_modifiers; -+} -+ - static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+ uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst; - struct vkd3d_shader_location *loc = &lhs->node.loc; -@@ -369,14 +387,17 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec - if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) - vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4); - -+ if (hlsl_type_major_size(type) > 1) -+ force_align = true; -+ - for (i = 0; i < hlsl_type_major_size(type); ++i) - { - struct hlsl_ir_node *store, *cast; - struct hlsl_ir_var *input; - struct hlsl_ir_load *load; - -- if (!(input = add_semantic_var(ctx, func, var, vector_type_src, modifiers, semantic, -- semantic_index + i, false, loc))) -+ if (!(input = add_semantic_var(ctx, func, var, vector_type_src, -+ modifiers, semantic, semantic_index + i, false, force_align, loc))) - return; - - if (!(load = hlsl_new_var_load(ctx, input, &var->loc))) -@@ -408,8 +429,9 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec - } - } - --static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -- struct hlsl_ir_load *lhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *lhs, uint32_t modifiers, -+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct vkd3d_shader_location *loc = &lhs->node.loc; - struct hlsl_type *type = lhs->node.data_type; -@@ -425,12 +447,14 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - - for (i = 0; i < hlsl_type_element_count(type); ++i) - { -- uint32_t element_modifiers = modifiers; -+ uint32_t element_modifiers; - - if (type->class == HLSL_CLASS_ARRAY) - { - elem_semantic_index = semantic_index - + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; -+ element_modifiers = modifiers; -+ force_align = true; - } - else - { -@@ -444,17 +468,8 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - semantic = &field->semantic; - elem_semantic_index = semantic->index; - loc = &field->loc; -- element_modifiers |= field->storage_modifiers; -- -- /* TODO: 'sample' modifier is not supported yet */ -- -- /* 'nointerpolation' always takes precedence, next the same is done for 'sample', -- remaining modifiers are combined. */ -- if (element_modifiers & HLSL_STORAGE_NOINTERPOLATION) -- { -- element_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK; -- element_modifiers |= HLSL_STORAGE_NOINTERPOLATION; -- } -+ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); -+ force_align = (i == 0); - } - - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) -@@ -466,12 +481,13 @@ static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - return; - list_add_after(&c->entry, &element_load->node.entry); - -- prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, semantic, elem_semantic_index); -+ prepend_input_copy_recurse(ctx, func, element_load, element_modifiers, -+ semantic, elem_semantic_index, force_align); - } - } - else - { -- prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index); -+ prepend_input_copy(ctx, func, lhs, modifiers, semantic, semantic_index, force_align); - } - } - -@@ -486,11 +502,12 @@ static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function - return; - list_add_head(&func->body.instrs, &load->node.entry); - -- prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ prepend_input_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); - } - --static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, -- uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -+ struct hlsl_ir_load *rhs, uint32_t modifiers, -+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct hlsl_type *type = rhs->node.data_type, *vector_type; - struct vkd3d_shader_location *loc = &rhs->node.loc; -@@ -511,6 +528,9 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec - - vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type)); - -+ if (hlsl_type_major_size(type) > 1) -+ force_align = true; -+ - for (i = 0; i < hlsl_type_major_size(type); ++i) - { - struct hlsl_ir_node *store; -@@ -518,7 +538,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec - struct hlsl_ir_load *load; - - if (!(output = add_semantic_var(ctx, func, var, vector_type, -- modifiers, semantic, semantic_index + i, true, loc))) -+ modifiers, semantic, semantic_index + i, true, force_align, loc))) - return; - - if (type->class == HLSL_CLASS_MATRIX) -@@ -546,8 +566,9 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_dec - } - } - --static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, -- struct hlsl_ir_load *rhs, uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index) -+static void append_output_copy_recurse(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers, -+ struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align) - { - struct vkd3d_shader_location *loc = &rhs->node.loc; - struct hlsl_type *type = rhs->node.data_type; -@@ -563,10 +584,14 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - - for (i = 0; i < hlsl_type_element_count(type); ++i) - { -+ uint32_t element_modifiers; -+ - if (type->class == HLSL_CLASS_ARRAY) - { - elem_semantic_index = semantic_index - + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4; -+ element_modifiers = modifiers; -+ force_align = true; - } - else - { -@@ -577,6 +602,8 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - semantic = &field->semantic; - elem_semantic_index = semantic->index; - loc = &field->loc; -+ element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers); -+ force_align = (i == 0); - } - - if (!(c = hlsl_new_uint_constant(ctx, i, &var->loc))) -@@ -587,12 +614,13 @@ static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_func - return; - hlsl_block_add_instr(&func->body, &element_load->node); - -- append_output_copy_recurse(ctx, func, element_load, modifiers, semantic, elem_semantic_index); -+ append_output_copy_recurse(ctx, func, element_load, element_modifiers, -+ semantic, elem_semantic_index, force_align); - } - } - else - { -- append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index); -+ append_output_copy(ctx, func, rhs, modifiers, semantic, semantic_index, force_align); - } - } - -@@ -608,7 +636,7 @@ static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function - return; - hlsl_block_add_instr(&func->body, &load->node); - -- append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index); -+ append_output_copy_recurse(ctx, func, load, var->storage_modifiers, &var->semantic, var->semantic.index, false); - } - - bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), -@@ -4051,6 +4079,44 @@ static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - return true; - } - -+static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *cond, *cond_cast, *abs, *neg; -+ struct hlsl_type *float_type; -+ struct hlsl_ir_jump *jump; -+ struct hlsl_block block; -+ -+ if (instr->type != HLSL_IR_JUMP) -+ return false; -+ jump = hlsl_ir_jump(instr); -+ if (jump->type != HLSL_IR_JUMP_DISCARD_NZ) -+ return false; -+ -+ cond = jump->condition.node; -+ float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->dimx); -+ -+ hlsl_block_init(&block); -+ -+ if (!(cond_cast = hlsl_new_cast(ctx, cond, float_type, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, cond_cast); -+ -+ if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, cond_cast, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, abs); -+ -+ if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(&block, neg); -+ -+ list_move_tail(&instr->entry, &block.instrs); -+ hlsl_src_remove(&jump->condition); -+ hlsl_src_from_node(&jump->condition, neg); -+ jump->type = HLSL_IR_JUMP_DISCARD_NEG; -+ -+ return true; -+} -+ - static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - switch (instr->type) -@@ -4419,6 +4485,9 @@ struct register_allocator - uint32_t reg; - unsigned int writemask; - unsigned int first_write, last_read; -+ -+ /* Two allocations with different mode can't share the same register. */ -+ int mode; - } *allocations; - size_t count, capacity; - -@@ -4428,10 +4497,17 @@ struct register_allocator - - /* Total number of registers allocated so far. Used to declare sm4 temp count. */ - uint32_t reg_count; -+ -+ /* Special flag so allocations that can share registers prioritize those -+ * that will result in smaller writemasks. -+ * For instance, a single-register allocation would prefer to share a register -+ * whose .xy components are already allocated (becoming .z) instead of a -+ * register whose .xyz components are already allocated (becoming .w). */ -+ bool prioritize_smaller_writemasks; - }; - - static unsigned int get_available_writemask(const struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_idx) -+ unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode) - { - unsigned int writemask = VKD3DSP_WRITEMASK_ALL; - size_t i; -@@ -4446,7 +4522,11 @@ static unsigned int get_available_writemask(const struct register_allocator *all - - if (allocation->reg == reg_idx - && first_write < allocation->last_read && last_read > allocation->first_write) -+ { - writemask &= ~allocation->writemask; -+ if (allocation->mode != mode) -+ writemask = 0; -+ } - - if (!writemask) - break; -@@ -4455,8 +4535,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all - return writemask; - } - --static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read) -+static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx, -+ unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode) - { - struct allocation *allocation; - -@@ -4469,6 +4549,7 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a - allocation->writemask = writemask; - allocation->first_write = first_write; - allocation->last_read = last_read; -+ allocation->mode = mode; - - allocator->reg_count = max(allocator->reg_count, reg_idx + 1); - } -@@ -4478,26 +4559,35 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a - * register, even if they don't use it completely. */ - static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, unsigned int reg_size, -- unsigned int component_count) -+ unsigned int component_count, int mode, bool force_align) - { -+ unsigned int required_size = force_align ? 4 : reg_size; -+ unsigned int writemask = 0, pref; - struct hlsl_reg ret = {0}; -- unsigned int writemask; - uint32_t reg_idx; - - VKD3D_ASSERT(component_count <= reg_size); - -- for (reg_idx = 0;; ++reg_idx) -+ pref = allocator->prioritize_smaller_writemasks ? 4 : required_size; -+ for (; pref >= required_size; --pref) - { -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx); -- -- if (vkd3d_popcount(writemask) >= reg_size) -+ for (reg_idx = 0; pref == required_size || reg_idx < allocator->reg_count; ++reg_idx) - { -- writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1); -- break; -+ unsigned int available_writemask = get_available_writemask(allocator, -+ first_write, last_read, reg_idx, mode); -+ -+ if (vkd3d_popcount(available_writemask) >= pref) -+ { -+ writemask = hlsl_combine_writemasks(available_writemask, (1u << reg_size) - 1); -+ break; -+ } - } -+ if (writemask) -+ break; - } - -- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read); -+ VKD3D_ASSERT(vkd3d_popcount(writemask) == reg_size); -+ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); - - ret.id = reg_idx; - ret.allocation_size = 1; -@@ -4508,7 +4598,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - - /* Allocate a register with writemask, while reserving reg_writemask. */ - static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) -+ unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode) - { - struct hlsl_reg ret = {0}; - uint32_t reg_idx; -@@ -4517,11 +4607,12 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct - - for (reg_idx = 0;; ++reg_idx) - { -- if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) -+ if ((get_available_writemask(allocator, first_write, last_read, -+ reg_idx, mode) & reg_writemask) == reg_writemask) - break; - } - -- record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); -+ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode); - - ret.id = reg_idx; - ret.allocation_size = 1; -@@ -4530,8 +4621,8 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct - return ret; - } - --static bool is_range_available(const struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) -+static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, -+ unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode) - { - unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1; - unsigned int writemask; -@@ -4539,18 +4630,18 @@ static bool is_range_available(const struct register_allocator *allocator, - - for (i = 0; i < (reg_size / 4); ++i) - { -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i); -+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode); - if (writemask != VKD3DSP_WRITEMASK_ALL) - return false; - } -- writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4)); -+ writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode); - if ((writemask & last_reg_mask) != last_reg_mask) - return false; - return true; - } - - static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, -- unsigned int first_write, unsigned int last_read, unsigned int reg_size) -+ unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode) - { - struct hlsl_reg ret = {0}; - uint32_t reg_idx; -@@ -4558,14 +4649,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo - - for (reg_idx = 0;; ++reg_idx) - { -- if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size)) -+ if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode)) - break; - } - - for (i = 0; i < reg_size / 4; ++i) -- record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read); -+ record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode); - if (reg_size % 4) -- record_allocation(ctx, allocator, reg_idx + (reg_size / 4), (1u << (reg_size % 4)) - 1, first_write, last_read); -+ record_allocation(ctx, allocator, reg_idx + (reg_size / 4), -+ (1u << (reg_size % 4)) - 1, first_write, last_read, mode); - - ret.id = reg_idx; - ret.allocation_size = align(reg_size, 4) / 4; -@@ -4581,9 +4673,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, - /* FIXME: We could potentially pack structs or arrays more efficiently... */ - - if (type->class <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); -+ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false); - else -- return allocate_range(ctx, allocator, first_write, last_read, reg_size); -+ return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0); - } - - static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type) -@@ -4762,7 +4854,7 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx, - - if (reg_writemask) - instr->reg = allocate_register_with_masks(ctx, allocator, -- instr->index, instr->last_read, reg_writemask, dst_writemask); -+ instr->index, instr->last_read, reg_writemask, dst_writemask, 0); - else - instr->reg = allocate_numeric_registers_for_type(ctx, allocator, - instr->index, instr->last_read, instr->data_type); -@@ -5083,14 +5175,14 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - { - if (i < bind_count) - { -- if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Overlapping register() reservations on 'c%u'.", reg_idx + i); - } -- record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); -+ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); - } -- record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); -+ record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0); - } - - var->regs[HLSL_REGSET_NUMERIC].id = reg_idx; -@@ -5113,7 +5205,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - - if (!var->regs[HLSL_REGSET_NUMERIC].allocated) - { -- var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size); -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0); - TRACE("Allocated %s to %s.\n", var->name, - debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type)); - } -@@ -5156,7 +5248,7 @@ uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_d - var = entry_func->parameters.vars[i]; - if (var->is_output_semantic) - { -- record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read); -+ record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0); - break; - } - } -@@ -5168,8 +5260,38 @@ uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_d - return allocator.reg_count; - } - -+enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type, unsigned int storage_modifiers) -+{ -+ unsigned int i; -+ -+ static const struct -+ { -+ unsigned int modifiers; -+ enum vkd3d_shader_interpolation_mode mode; -+ } -+ modes[] = -+ { -+ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID}, -+ {HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE}, -+ {HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID}, -+ {HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID}, -+ }; -+ -+ if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) -+ || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT) -+ return VKD3DSIM_CONSTANT; -+ -+ for (i = 0; i < ARRAY_SIZE(modes); ++i) -+ { -+ if ((storage_modifiers & modes[i].modifiers) == modes[i].modifiers) -+ return modes[i].mode; -+ } -+ -+ return VKD3DSIM_LINEAR; -+} -+ - static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, -- unsigned int *counter, bool output, bool is_patch_constant_func) -+ struct register_allocator *allocator, bool output, bool optimize, bool is_patch_constant_func) - { - static const char *const shader_names[] = - { -@@ -5228,6 +5350,13 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx))) - reg = has_idx ? var->semantic.index : 0; -+ -+ if (semantic == VKD3D_SHADER_SV_TESS_FACTOR_TRIINT) -+ { -+ /* While SV_InsideTessFactor can be declared as 'float' for "tri" -+ * domains, it is allocated as if it was 'float[1]'. */ -+ var->force_align = true; -+ } - } - - if (builtin) -@@ -5237,28 +5366,39 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - } - else - { -- var->regs[HLSL_REGSET_NUMERIC].allocated = true; -- var->regs[HLSL_REGSET_NUMERIC].id = (*counter)++; -- var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1; -- var->regs[HLSL_REGSET_NUMERIC].writemask = (1 << var->data_type->dimx) - 1; -- TRACE("Allocated %s to %s.\n", var->name, debug_register(output ? 'o' : 'v', -- var->regs[HLSL_REGSET_NUMERIC], var->data_type)); -+ int mode = (ctx->profile->major_version < 4) -+ ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -+ unsigned int reg_size = optimize ? var->data_type->dimx : 4; -+ -+ var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, -+ UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align); -+ -+ TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v', -+ var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode); - } - } - - static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -+ struct register_allocator input_allocator = {0}, output_allocator = {0}; -+ bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX; -+ bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL; - bool is_patch_constant_func = entry_func == ctx->patch_constant_func; -- unsigned int input_counter = 0, output_counter = 0; - struct hlsl_ir_var *var; - -+ input_allocator.prioritize_smaller_writemasks = true; -+ output_allocator.prioritize_smaller_writemasks = true; -+ - LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry) - { - if (var->is_input_semantic) -- allocate_semantic_register(ctx, var, &input_counter, false, is_patch_constant_func); -+ allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader, is_patch_constant_func); - if (var->is_output_semantic) -- allocate_semantic_register(ctx, var, &output_counter, true, is_patch_constant_func); -+ allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader, is_patch_constant_func); - } -+ -+ vkd3d_free(input_allocator.allocations); -+ vkd3d_free(output_allocator.allocations); - } - - static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, -@@ -6282,7 +6422,7 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - } - - static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program, -- struct shader_signature *signature, bool output, struct hlsl_ir_var *var) -+ struct shader_signature *signature, bool output, bool is_patch_constant_func, struct hlsl_ir_var *var) - { - enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; - enum vkd3d_shader_component_type component_type; -@@ -6296,9 +6436,8 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - struct vkd3d_string_buffer *string; - bool has_idx, ret; - -- ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, -- ctx->semantic_compat_mapping, ctx->domain, var->semantic.name, var->semantic.index, -- output, signature == &program->patch_constant_signature); -+ ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping, -+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); - VKD3D_ASSERT(ret); - if (sysval == ~0u) - return; -@@ -6306,16 +6445,15 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx)) - { - register_index = has_idx ? var->semantic.index : ~0u; -+ mask = (1u << var->data_type->dimx) - 1; - } - else - { - VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ mask = var->regs[HLSL_REGSET_NUMERIC].writemask; - } - -- /* NOTE: remember to change this to the actually allocated mask once -- * we start optimizing interstage signatures. */ -- mask = (1u << var->data_type->dimx) - 1; - use_mask = mask; /* FIXME: retrieve use mask accurately. */ - - switch (var->data_type->e.numeric.type) -@@ -6410,21 +6548,27 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - static void generate_vsir_signature(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_function_decl *func) - { -+ bool is_domain = program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN; -+ bool is_patch_constant_func = func == ctx->patch_constant_func; - struct hlsl_ir_var *var; - - LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { -- if (func == ctx->patch_constant_func) -+ if (var->is_input_semantic) - { -- generate_vsir_signature_entry(ctx, program, -- &program->patch_constant_signature, var->is_output_semantic, var); -+ if (is_patch_constant_func) -+ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, true, var); -+ else if (is_domain) -+ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, false, var); -+ else -+ generate_vsir_signature_entry(ctx, program, &program->input_signature, false, false, var); - } -- else -+ if (var->is_output_semantic) - { -- if (var->is_input_semantic) -- generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var); -- if (var->is_output_semantic) -- generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var); -+ if (is_patch_constant_func) -+ generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, true, var); -+ else -+ generate_vsir_signature_entry(ctx, program, &program->output_signature, true, false, var); - } - } - } -@@ -7366,7 +7510,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -@@ -7404,7 +7548,7 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; - -- if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, false)) -+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -@@ -7715,6 +7859,78 @@ static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru - return true; - } - -+static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *call, *rhs, *store; -+ struct hlsl_ir_function_decl *func; -+ unsigned int component_count; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_expr *expr; -+ struct hlsl_ir_var *lhs; -+ char *body; -+ -+ static const char template[] = -+ "typedef uint%u uintX;\n" -+ "uintX soft_f32tof16(float%u x)\n" -+ "{\n" -+ " uintX v = asuint(x);\n" -+ " uintX v_abs = v & 0x7fffffff;\n" -+ " uintX sign_bit = (v >> 16) & 0x8000;\n" -+ " uintX exp = (v >> 23) & 0xff;\n" -+ " uintX mantissa = v & 0x7fffff;\n" -+ " uintX nan16;\n" -+ " uintX nan = (v & 0x7f800000) == 0x7f800000;\n" -+ " uintX val;\n" -+ "\n" -+ " val = 113 - exp;\n" -+ " val = (mantissa + 0x800000) >> val;\n" -+ " val >>= 13;\n" -+ "\n" -+ " val = (exp - 127) < -38 ? 0 : val;\n" -+ "\n" -+ " val = v_abs < 0x38800000 ? val : (v_abs + 0xc8000000) >> 13;\n" -+ " val = v_abs > 0x47ffe000 ? 0x7bff : val;\n" -+ "\n" -+ " nan16 = (((v >> 13) | (v >> 3) | v) & 0x3ff) + 0x7c00;\n" -+ " val = nan ? nan16 : val;\n" -+ "\n" -+ " return (val & 0x7fff) + sign_bit;\n" -+ "}\n"; -+ -+ if (node->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(node); -+ -+ if (expr->op != HLSL_OP1_F32TOF16) -+ return false; -+ -+ rhs = expr->operands[0].node; -+ component_count = hlsl_type_component_count(rhs->data_type); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) -+ return false; -+ -+ if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body))) -+ return false; -+ -+ lhs = func->parameters.vars[0]; -+ -+ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ if (!(call = hlsl_new_call(ctx, func, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, call); -+ -+ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, &load->node); -+ -+ return true; -+} -+ - static void process_entry_function(struct hlsl_ctx *ctx, - const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func) - { -@@ -7743,7 +7959,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, - return; - - if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) -+ { - lower_ir(ctx, lower_f16tof32, body); -+ lower_ir(ctx, lower_f32tof16, body); -+ } - - lower_return(ctx, entry_func, body, false); - -@@ -7797,6 +8016,10 @@ static void process_entry_function(struct hlsl_ctx *ctx, - { - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } -+ else -+ { -+ hlsl_transform_ir(ctx, lower_discard_nz, body, NULL); -+ } - - transform_unroll_loops(ctx, body); - hlsl_run_const_passes(ctx, body); -@@ -7893,6 +8116,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads) - hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, - "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); -+ else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID) -+ hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, -+ "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name); - - hlsl_block_init(&global_uniform_block); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b47f12d2188..19dc36d9191 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -75,7 +75,7 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil - - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -- bool normalised_io) -+ enum vsir_normalisation_level normalisation_level) - { - memset(program, 0, sizeof(*program)); - -@@ -98,8 +98,7 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c - - program->shader_version = *version; - program->cf_type = cf_type; -- program->normalised_io = normalised_io; -- program->normalised_hull_cp_io = normalised_io; -+ program->normalisation_level = normalisation_level; - return shader_instruction_array_init(&program->instructions, reserve); - } - -@@ -265,6 +264,13 @@ static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigne - dst->reg.idx[0].offset = idx; - } - -+static void dst_param_init_temp_float4(struct vkd3d_shader_dst_param *dst, unsigned int idx) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ dst->reg.idx[0].offset = idx; -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+} -+ - static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) - { - vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -@@ -693,6 +699,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_CONSTANT_BUFFER: -+ case VKD3DSIH_DCL_GLOBAL_FLAGS: - case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_TEMPS: - case VKD3DSIH_DCL_THREAD_GROUP: -@@ -1135,11 +1142,11 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - enum vkd3d_result ret; - unsigned int i, j; - -- VKD3D_ASSERT(!program->normalised_hull_cp_io); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NOT_NORMALISED); - - if (program->shader_version.type != VKD3D_SHADER_TYPE_HULL) - { -- program->normalised_hull_cp_io = true; -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - return VKD3D_OK; - } - -@@ -1186,7 +1193,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - break; - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - program->instructions = normaliser.instructions; -- program->normalised_hull_cp_io = true; -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - return VKD3D_OK; - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -@@ -1195,7 +1202,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - ret = control_point_normaliser_emit_hs_input(&normaliser, &program->input_signature, - input_control_point_count, i, &location); - program->instructions = normaliser.instructions; -- program->normalised_hull_cp_io = true; -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - return ret; - default: - break; -@@ -1203,7 +1210,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - } - - program->instructions = normaliser.instructions; -- program->normalised_hull_cp_io = true; -+ program->normalisation_level = VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - return VKD3D_OK; - } - -@@ -1917,7 +1924,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - struct vkd3d_shader_instruction *ins; - unsigned int i; - -- VKD3D_ASSERT(!program->normalised_io); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_HULL_CONTROL_POINT_IO); - - normaliser.phase = VKD3DSIH_INVALID; - normaliser.shader_type = program->shader_version.type; -@@ -1975,7 +1982,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - - program->instructions = normaliser.instructions; - program->use_vocp = normaliser.use_vocp; -- program->normalised_io = true; -+ program->normalisation_level = VSIR_FULLY_NORMALISED_IO; - return VKD3D_OK; - } - -@@ -6133,6 +6140,192 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra - return VKD3D_OK; - } - -+static bool has_texcoord_signature_element(const struct shader_signature *signature) -+{ -+ for (size_t i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(signature->elements[i].semantic_name, "TEXCOORD")) -+ return true; -+ } -+ return false; -+} -+ -+/* Returns true if replacement was done. */ -+static bool replace_texcoord_with_point_coord(struct vsir_program *program, -+ struct vkd3d_shader_src_param *src, unsigned int coord_temp) -+{ -+ uint32_t prev_swizzle = src->swizzle; -+ const struct signature_element *e; -+ -+ /* The input semantic may have a nontrivial mask, which we need to -+ * correct for. E.g. if the mask is .yz, and we read from .y, that needs -+ * to become .x. */ -+ static const uint32_t inverse_swizzles[16] = -+ { -+ /* Use _ for "undefined" components, for clarity. */ -+#define VKD3D_SHADER_SWIZZLE__ VKD3D_SHADER_SWIZZLE_X -+ 0, -+ /* .x */ VKD3D_SHADER_SWIZZLE(X, _, _, _), -+ /* .y */ VKD3D_SHADER_SWIZZLE(_, X, _, _), -+ /* .xy */ VKD3D_SHADER_SWIZZLE(X, Y, _, _), -+ /* .z */ VKD3D_SHADER_SWIZZLE(_, _, X, _), -+ /* .xz */ VKD3D_SHADER_SWIZZLE(X, _, Y, _), -+ /* .yz */ VKD3D_SHADER_SWIZZLE(_, X, Y, _), -+ /* .xyz */ VKD3D_SHADER_SWIZZLE(X, Y, Z, _), -+ /* .w */ VKD3D_SHADER_SWIZZLE(_, _, _, X), -+ /* .xw */ VKD3D_SHADER_SWIZZLE(X, _, _, Y), -+ /* .yw */ VKD3D_SHADER_SWIZZLE(_, X, _, Y), -+ /* .xyw */ VKD3D_SHADER_SWIZZLE(X, Y, _, Z), -+ /* .zw */ VKD3D_SHADER_SWIZZLE(_, _, X, Y), -+ /* .xzw */ VKD3D_SHADER_SWIZZLE(X, _, Y, Z), -+ /* .yzw */ VKD3D_SHADER_SWIZZLE(_, X, Y, Z), -+ /* .xyzw */ VKD3D_SHADER_SWIZZLE(X, Y, Z, W), -+#undef VKD3D_SHADER_SWIZZLE__ -+ }; -+ -+ if (src->reg.type != VKD3DSPR_INPUT) -+ return false; -+ e = &program->input_signature.elements[src->reg.idx[0].offset]; -+ -+ if (ascii_strcasecmp(e->semantic_name, "TEXCOORD")) -+ return false; -+ -+ src->reg.type = VKD3DSPR_TEMP; -+ src->reg.idx[0].offset = coord_temp; -+ -+ /* If the mask is already contiguous and zero-based, no need to remap -+ * the swizzle. */ -+ if (!(e->mask & (e->mask + 1))) -+ return true; -+ -+ src->swizzle = 0; -+ for (unsigned int i = 0; i < 4; ++i) -+ { -+ src->swizzle |= vsir_swizzle_get_component(inverse_swizzles[e->mask], -+ vsir_swizzle_get_component(prev_swizzle, i)) << VKD3D_SHADER_SWIZZLE_SHIFT(i); -+ } -+ -+ return true; -+} -+ -+static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ const struct vkd3d_shader_parameter1 *sprite_parameter = NULL; -+ static const struct vkd3d_shader_location no_loc; -+ struct vkd3d_shader_instruction *ins; -+ bool used_texcoord = false; -+ unsigned int coord_temp; -+ size_t i, insert_pos; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ return VKD3D_OK; -+ -+ for (i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -+ -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_POINT_SPRITE) -+ sprite_parameter = parameter; -+ } -+ -+ if (!sprite_parameter) -+ return VKD3D_OK; -+ -+ if (sprite_parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported point sprite parameter type %#x.", sprite_parameter->type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ if (sprite_parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid point sprite parameter data type %#x.", sprite_parameter->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ if (!sprite_parameter->u.immediate_constant.u.u32) -+ return VKD3D_OK; -+ -+ if (!has_texcoord_signature_element(&program->input_signature)) -+ return VKD3D_OK; -+ -+ /* VKD3DSPR_POINTCOORD is a two-component value; fill the remaining two -+ * components with zeroes. */ -+ coord_temp = program->temp_count++; -+ -+ /* Construct the new temp after all LABEL, DCL, and NOP instructions. -+ * We need to skip NOP instructions because they might result from removed -+ * DCLs, and there could still be DCLs after NOPs. */ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) -+ break; -+ } -+ -+ insert_pos = i; -+ -+ /* Replace each texcoord read with a read from the point coord. */ -+ for (; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ for (unsigned int j = 0; j < ins->src_count; ++j) -+ { -+ used_texcoord |= replace_texcoord_with_point_coord(program, &ins->src[j], coord_temp); -+ -+ for (unsigned int k = 0; k < ins->src[j].reg.idx_count; ++k) -+ { -+ if (ins->src[j].reg.idx[k].rel_addr) -+ used_texcoord |= replace_texcoord_with_point_coord(program, -+ ins->src[j].reg.idx[k].rel_addr, coord_temp); -+ } -+ } -+ -+ for (unsigned int j = 0; j < ins->dst_count; ++j) -+ { -+ for (unsigned int k = 0; k < ins->dst[j].reg.idx_count; ++k) -+ { -+ if (ins->dst[j].reg.idx[k].rel_addr) -+ used_texcoord |= replace_texcoord_with_point_coord(program, -+ ins->dst[j].reg.idx[k].rel_addr, coord_temp); -+ } -+ } -+ } -+ -+ if (used_texcoord) -+ { -+ if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[insert_pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_temp_float4(&ins->dst[0], coord_temp); -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ ++ins; -+ -+ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_temp_float4(&ins->dst[0], coord_temp); -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_2 | VKD3DSP_WRITEMASK_3; -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ++ins; -+ -+ program->has_point_coord = true; -+ } -+ -+ return VKD3D_OK; -+} -+ - struct validation_context - { - struct vkd3d_shader_message_context *message_context; -@@ -6234,15 +6427,11 @@ static void vsir_validate_io_register(struct validation_context *ctx, - switch (ctx->program->shader_version.type) - { - case VKD3D_SHADER_TYPE_HULL: -- if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ if (ctx->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE -+ || ctx->program->normalisation_level >= VSIR_FULLY_NORMALISED_IO) - { - signature = &ctx->program->output_signature; -- has_control_point = ctx->program->normalised_hull_cp_io; -- } -- else if (ctx->program->normalised_io) -- { -- signature = &ctx->program->output_signature; -- has_control_point = true; -+ has_control_point = ctx->program->normalisation_level >= VSIR_NORMALISED_HULL_CONTROL_POINT_IO; - } - else - { -@@ -6274,7 +6463,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, - vkd3d_unreachable(); - } - -- if (!ctx->program->normalised_io) -+ if (ctx->program->normalisation_level < VSIR_FULLY_NORMALISED_IO) - { - /* Indices are [register] or [control point, register]. Both are - * allowed to have a relative address. */ -@@ -7700,8 +7889,10 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - - switch (program->shader_version.type) - { -- case VKD3D_SHADER_TYPE_HULL: - case VKD3D_SHADER_TYPE_DOMAIN: -+ break; -+ -+ case VKD3D_SHADER_TYPE_HULL: - case VKD3D_SHADER_TYPE_GEOMETRY: - if (program->input_control_point_count == 0) - validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -@@ -7718,9 +7909,6 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c - switch (program->shader_version.type) - { - case VKD3D_SHADER_TYPE_HULL: -- if (program->output_control_point_count == 0) -- validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, -- "Invalid zero output control point count."); - break; - - default: -@@ -7844,6 +8032,7 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t - vsir_transform(&ctx, vsir_program_insert_clip_planes); - vsir_transform(&ctx, vsir_program_insert_point_size); - vsir_transform(&ctx, vsir_program_insert_point_size_clamp); -+ vsir_transform(&ctx, vsir_program_insert_point_coord); - - if (TRACE_ON()) - vsir_program_trace(program); -diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c -index 5baefbc1f44..a0dbb06342d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/msl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/msl.c -@@ -785,6 +785,10 @@ static void msl_generator_generate(struct msl_generator *gen) - - vkd3d_string_buffer_printf(gen->buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); - -+ if (gen->program->global_flags) -+ msl_compiler_error(gen, VKD3D_SHADER_ERROR_MSL_INTERNAL, -+ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)gen->program->global_flags); -+ - vkd3d_string_buffer_printf(gen->buffer, "union vkd3d_vec4\n{\n"); - vkd3d_string_buffer_printf(gen->buffer, " uint4 u;\n"); - vkd3d_string_buffer_printf(gen->buffer, " int4 i;\n"); -@@ -869,8 +873,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, - if ((ret = vsir_program_transform(program, config_flags, compile_info, message_context)) < 0) - return ret; - -- VKD3D_ASSERT(program->normalised_io); -- VKD3D_ASSERT(program->normalised_hull_cp_io); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); - - if ((ret = msl_generator_init(&generator, program, compile_info, descriptor_info, message_context)) < 0) - return ret; -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 6a28e2cd68e..802fe221747 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -3252,6 +3252,9 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - case VKD3DSPR_WAVELANEINDEX: - snprintf(buffer, buffer_size, "vWaveLaneIndex"); - break; -+ case VKD3DSPR_POINT_COORD: -+ snprintf(buffer, buffer_size, "vPointCoord"); -+ break; - default: - FIXME("Unhandled register %#x.\n", reg->type); - snprintf(buffer, buffer_size, "unrecognized_%#x", reg->type); -@@ -4886,6 +4889,8 @@ vkd3d_register_builtins[] = - - {VKD3DSPR_TESSCOORD, {VKD3D_SHADER_COMPONENT_FLOAT, 3, SpvBuiltInTessCoord}}, - -+ {VKD3DSPR_POINT_COORD, {VKD3D_SHADER_COMPONENT_FLOAT, 2, SpvBuiltInPointCoord}}, -+ - {VKD3DSPR_COVERAGE, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - {VKD3DSPR_SAMPLEMASK, {VKD3D_SHADER_COMPONENT_UINT, 1, SpvBuiltInSampleMask, NULL, 1}}, - -@@ -5907,11 +5912,8 @@ static size_t spirv_compiler_get_current_function_location(struct spirv_compiler - return builder->main_function_location; - } - --static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler, -- const struct vkd3d_shader_instruction *instruction) -+static void spirv_compiler_emit_global_flags(struct spirv_compiler *compiler, enum vsir_global_flags flags) - { -- enum vkd3d_shader_global_flags flags = instruction->declaration.global_flags; -- - if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) - { - spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeEarlyFragmentTests, NULL, 0); -@@ -10180,9 +10182,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - - switch (instruction->opcode) - { -- case VKD3DSIH_DCL_GLOBAL_FLAGS: -- spirv_compiler_emit_dcl_global_flags(compiler, instruction); -- break; - case VKD3DSIH_DCL_INDEXABLE_TEMP: - spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); - break; -@@ -10596,6 +10595,14 @@ static void spirv_compiler_emit_io_declarations(struct spirv_compiler *compiler) - dst.reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; - spirv_compiler_emit_output_register(compiler, &dst); - } -+ -+ if (compiler->program->has_point_coord) -+ { -+ struct vkd3d_shader_dst_param dst; -+ -+ vsir_dst_param_init(&dst, VKD3DSPR_POINT_COORD, VKD3D_DATA_FLOAT, 0); -+ spirv_compiler_emit_input_register(compiler, &dst); -+ } - } - - static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *compiler) -@@ -10650,8 +10657,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - compile_info, compiler->message_context)) < 0) - return result; - -- VKD3D_ASSERT(program->normalised_io); -- VKD3D_ASSERT(program->normalised_hull_cp_io); -+ VKD3D_ASSERT(program->normalisation_level == VSIR_FULLY_NORMALISED_IO); - - max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); - if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -@@ -10663,6 +10669,7 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - spirv_compiler_allocate_ssa_register_ids(compiler, program->ssa_count); - if (compiler->shader_type == VKD3D_SHADER_TYPE_COMPUTE) - spirv_compiler_emit_thread_group_size(compiler, &program->thread_group_size); -+ spirv_compiler_emit_global_flags(compiler, program->global_flags); - - spirv_compiler_emit_descriptor_declarations(compiler); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 848e78a34d3..f96d300676c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -1212,9 +1212,10 @@ static void shader_sm4_read_dcl_indexable_temp(struct vkd3d_shader_instruction * - } - - static void shader_sm4_read_dcl_global_flags(struct vkd3d_shader_instruction *ins, uint32_t opcode, -- uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) -+ uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *sm4) - { - ins->declaration.global_flags = (opcode_token & VKD3D_SM4_GLOBAL_FLAGS_MASK) >> VKD3D_SM4_GLOBAL_FLAGS_SHIFT; -+ sm4->p.program->global_flags = ins->declaration.global_flags; - } - - static void shader_sm5_read_fcall(struct vkd3d_shader_instruction *ins, uint32_t opcode, uint32_t opcode_token, -@@ -2793,7 +2794,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro - - /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, compile_info, -- &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, false)) -+ &version, token_count / 7u + 20, VSIR_CF_STRUCTURED, VSIR_NOT_NORMALISED)) - return false; - vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); - sm4->ptr = sm4->start; -@@ -3017,6 +3018,9 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, - -+ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_TESSCOORD, false}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, -+ - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3DSPR_PRIMID, false}, - - {"sv_outputcontrolpointid", false, VKD3D_SHADER_TYPE_HULL, VKD3DSPR_OUTPOINTID, false}, -@@ -3115,6 +3119,12 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - -+ {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_NONE}, -+ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, -+ -+ {"sv_position", true, VKD3D_SHADER_TYPE_DOMAIN, VKD3D_SHADER_SV_POSITION}, -+ - {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, - {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, -@@ -3179,6 +3189,16 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s - return false; - } - } -+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ { -+ if (!output) -+ { -+ if (!ascii_strcasecmp(semantic_name, "sv_tessfactor")) -+ return get_tessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ if (!ascii_strcasecmp(semantic_name, "sv_insidetessfactor")) -+ return get_insidetessfactor_sysval_semantic(sysval_semantic, domain, semantic_idx); -+ } -+ } - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { -@@ -3213,18 +3233,37 @@ static void add_section(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - ctx->result = buffer->status; - } - -+static int signature_element_pointer_compare(const void *x, const void *y) -+{ -+ const struct signature_element *e = *(const struct signature_element **)x; -+ const struct signature_element *f = *(const struct signature_element **)y; -+ int ret; -+ -+ if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) -+ return ret; -+ return vkd3d_u32_compare(e->mask, f->mask); -+} -+ - static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_signature *signature, uint32_t tag) - { -- bool output = tag == TAG_OSGN || tag == TAG_PCSG; -+ bool output = tag == TAG_OSGN || (tag == TAG_PCSG -+ && tpf->program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ const struct signature_element **sorted_elements; - struct vkd3d_bytecode_buffer buffer = {0}; - unsigned int i; - - put_u32(&buffer, signature->element_count); - put_u32(&buffer, 8); /* unknown */ - -+ if (!(sorted_elements = vkd3d_calloc(signature->element_count, sizeof(*sorted_elements)))) -+ return; -+ for (i = 0; i < signature->element_count; ++i) -+ sorted_elements[i] = &signature->elements[i]; -+ qsort(sorted_elements, signature->element_count, sizeof(*sorted_elements), signature_element_pointer_compare); -+ - for (i = 0; i < signature->element_count; ++i) - { -- const struct signature_element *element = &signature->elements[i]; -+ const struct signature_element *element = sorted_elements[i]; - enum vkd3d_shader_sysval_semantic sysval; - uint32_t used_mask = element->used_mask; - -@@ -3245,7 +3284,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si - - for (i = 0; i < signature->element_count; ++i) - { -- const struct signature_element *element = &signature->elements[i]; -+ const struct signature_element *element = sorted_elements[i]; - size_t string_offset; - - string_offset = put_string(&buffer, element->semantic_name); -@@ -3253,6 +3292,7 @@ static void tpf_write_signature(struct tpf_compiler *tpf, const struct shader_si - } - - add_section(tpf->ctx, &tpf->dxbc, tag, &buffer); -+ vkd3d_free(sorted_elements); - } - - static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) -@@ -3410,13 +3450,19 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) - - static enum vkd3d_sm4_data_type sm4_data_type(const struct hlsl_type *type) - { -- switch (type->e.resource.format->e.numeric.type) -+ const struct hlsl_type *format = type->e.resource.format; -+ -+ switch (format->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: - return VKD3D_SM4_DATA_DOUBLE; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: -+ if (format->modifiers & HLSL_MODIFIER_UNORM) -+ return VKD3D_SM4_DATA_UNORM; -+ if (format->modifiers & HLSL_MODIFIER_SNORM) -+ return VKD3D_SM4_DATA_SNORM; - return VKD3D_SM4_DATA_FLOAT; - - case HLSL_TYPE_INT: -@@ -4224,7 +4270,11 @@ static void sm4_register_from_deref(const struct tpf_compiler *tpf, struct vkd3d - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - - VKD3D_ASSERT(hlsl_reg.allocated); -- reg->type = VKD3DSPR_INPUT; -+ -+ if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ reg->type = VKD3DSPR_PATCHCONST; -+ else -+ reg->type = VKD3DSPR_INPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; - reg->idx_count = 1; -@@ -4818,7 +4868,13 @@ static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, - } - else - { -- instr.dsts[0].reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ if (output) -+ instr.dsts[0].reg.type = VKD3DSPR_OUTPUT; -+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ instr.dsts[0].reg.type = VKD3DSPR_PATCHCONST; -+ else -+ instr.dsts[0].reg.type = VKD3DSPR_INPUT; -+ - instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - instr.dsts[0].reg.idx_count = 1; - instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; -@@ -4858,38 +4914,9 @@ static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, - - if (version->type == VKD3D_SHADER_TYPE_PIXEL) - { -- enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR; -- -- if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type)) -- { -- mode = VKD3DSIM_CONSTANT; -- } -- else -- { -- static const struct -- { -- unsigned int modifiers; -- enum vkd3d_shader_interpolation_mode mode; -- } -- modes[] = -- { -- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID }, -- { HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE }, -- { HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID }, -- { HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID }, -- }; -- unsigned int i; -- -- for (i = 0; i < ARRAY_SIZE(modes); ++i) -- { -- if ((var->storage_modifiers & modes[i].modifiers) == modes[i].modifiers) -- { -- mode = modes[i].mode; -- break; -- } -- } -- } -+ enum vkd3d_shader_interpolation_mode mode; - -+ mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); - instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; - } - } -@@ -5667,6 +5694,12 @@ static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_ - write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); - break; - -+ case HLSL_OP1_F32TOF16: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_F32TOF16, &expr->node, arg1, 0); -+ break; -+ - case HLSL_OP1_FLOOR: - VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); -@@ -6592,6 +6625,11 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec - tpf_write_dcl_tessellator_partitioning(tpf, ctx->partitioning); - tpf_write_dcl_tessellator_output_primitive(tpf, ctx->output_primitive); - } -+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ { -+ tpf_write_dcl_input_control_point_count(tpf, 0); /* TODO: Obtain from OutputPatch */ -+ tpf_write_dcl_tessellator_domain(tpf, ctx->domain); -+ } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { -@@ -6717,6 +6755,7 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, - struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -+ enum vkd3d_shader_type shader_type = program->shader_version.type; - struct tpf_compiler tpf = {0}; - struct sm4_stat stat = {0}; - size_t i; -@@ -6731,7 +6770,7 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, - - tpf_write_signature(&tpf, &program->input_signature, TAG_ISGN); - tpf_write_signature(&tpf, &program->output_signature, TAG_OSGN); -- if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL) -+ if (shader_type == VKD3D_SHADER_TYPE_HULL || shader_type == VKD3D_SHADER_TYPE_DOMAIN) - tpf_write_signature(&tpf, &program->patch_constant_signature, TAG_PCSG); - write_sm4_rdef(ctx, &tpf.dxbc); - tpf_write_shdr(&tpf, entry_func); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 9df538a0da0..d6c68155ee7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -648,6 +648,7 @@ enum vkd3d_shader_register_type - VKD3DSPR_WAVELANECOUNT, - VKD3DSPR_WAVELANEINDEX, - VKD3DSPR_PARAMETER, -+ VKD3DSPR_POINT_COORD, - - VKD3DSPR_COUNT, - -@@ -773,7 +774,7 @@ enum vkd3d_shader_interpolation_mode - VKD3DSIM_COUNT = 8, - }; - --enum vkd3d_shader_global_flags -+enum vsir_global_flags - { - VKD3DSGF_REFACTORING_ALLOWED = 0x01, - VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = 0x02, -@@ -1246,7 +1247,7 @@ struct vkd3d_shader_instruction - const struct vkd3d_shader_src_param *predicate; - union - { -- enum vkd3d_shader_global_flags global_flags; -+ enum vsir_global_flags global_flags; - struct vkd3d_shader_semantic semantic; - struct vkd3d_shader_register_semantic register_semantic; - struct vkd3d_shader_primitive_type primitive_type; -@@ -1393,6 +1394,13 @@ enum vsir_control_flow_type - VSIR_CF_BLOCKS, - }; - -+enum vsir_normalisation_level -+{ -+ VSIR_NOT_NORMALISED, -+ VSIR_NORMALISED_HULL_CONTROL_POINT_IO, -+ VSIR_FULLY_NORMALISED_IO, -+}; -+ - struct vsir_program - { - struct vkd3d_shader_version shader_version; -@@ -1412,11 +1420,12 @@ struct vsir_program - unsigned int block_count; - unsigned int temp_count; - unsigned int ssa_count; -+ enum vsir_global_flags global_flags; - bool use_vocp; - bool has_point_size; -+ bool has_point_coord; - enum vsir_control_flow_type cf_type; -- bool normalised_io; -- bool normalised_hull_cp_io; -+ enum vsir_normalisation_level normalisation_level; - - const char **block_names; - size_t block_name_count; -@@ -1430,7 +1439,7 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( - const struct vsir_program *program, enum vkd3d_shader_parameter_name name); - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type, -- bool normalised_io); -+ enum vsir_normalisation_level normalisation_level); - enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 5495809fcb9..ed4cc370639 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2005,6 +2005,8 @@ static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *li - - vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size, - state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views)); -+ memset(bindings->vk_uav_counter_views, 0, -+ state->uav_counters.binding_count * sizeof(*bindings->vk_uav_counter_views)); - bindings->uav_counters_dirty = true; - } - } -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 65339c7ba5d..fd0ca20838f 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1573,6 +1573,111 @@ static HRESULT vkd3d_check_device_extensions(struct d3d12_device *device, - return S_OK; - } - -+static void vkd3d_override_caps(struct d3d12_device *device) -+{ -+ const char *caps_override, *p; -+ -+ static const struct override_value -+ { -+ const char *str; -+ uint32_t value; -+ } -+ feature_level_override_values[] = -+ { -+ {"11.0", D3D_FEATURE_LEVEL_11_0}, -+ {"11.1", D3D_FEATURE_LEVEL_11_1}, -+ {"12.0", D3D_FEATURE_LEVEL_12_0}, -+ {"12.1", D3D_FEATURE_LEVEL_12_1}, -+ {"12.2", D3D_FEATURE_LEVEL_12_2}, -+ }, -+ resource_binding_tier_override_values[] = -+ { -+ {"1", D3D12_RESOURCE_BINDING_TIER_1}, -+ {"2", D3D12_RESOURCE_BINDING_TIER_2}, -+ {"3", D3D12_RESOURCE_BINDING_TIER_3}, -+ }; -+ static const struct override_field -+ { -+ const char *name; -+ size_t offset; -+ const struct override_value *values; -+ size_t value_count; -+ } -+ override_fields[] = -+ { -+ { -+ "feature_level", -+ offsetof(struct d3d12_device, vk_info.max_feature_level), -+ feature_level_override_values, -+ ARRAY_SIZE(feature_level_override_values) -+ }, -+ { -+ "resource_binding_tier", -+ offsetof(struct d3d12_device, feature_options.ResourceBindingTier), -+ resource_binding_tier_override_values, -+ ARRAY_SIZE(resource_binding_tier_override_values) -+ }, -+ }; -+ -+ if (!(caps_override = getenv("VKD3D_CAPS_OVERRIDE"))) -+ return; -+ -+ p = caps_override; -+ for (;;) -+ { -+ size_t i; -+ -+ for (i = 0; i < ARRAY_SIZE(override_fields); ++i) -+ { -+ const struct override_field *field = &override_fields[i]; -+ size_t len = strlen(field->name); -+ -+ if (strncmp(p, field->name, len) == 0 && p[len] == '=') -+ { -+ size_t j; -+ -+ p += len + 1; -+ -+ for (j = 0; j < field->value_count; ++j) -+ { -+ const struct override_value *value = &field->values[j]; -+ size_t value_len = strlen(value->str); -+ -+ if (strncmp(p, value->str, value_len) == 0 -+ && (p[value_len] == '\0' || p[value_len] == ',')) -+ { -+ memcpy(&((uint8_t *)device)[field->offset], (uint8_t *)&value->value, sizeof(value->value)); -+ -+ p += value_len; -+ if (p[0] == '\0') -+ { -+ TRACE("Overriding caps with: %s\n", caps_override); -+ return; -+ } -+ p += 1; -+ -+ break; -+ } -+ } -+ -+ if (j == field->value_count) -+ { -+ WARN("Cannot parse the override caps string: %s\n", caps_override); -+ return; -+ } -+ -+ break; -+ } -+ } -+ -+ if (i == ARRAY_SIZE(override_fields)) -+ { -+ WARN("Cannot parse the override caps string: %s\n", caps_override); -+ return; -+ } -+ } -+} -+ - static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - const struct vkd3d_device_create_info *create_info, - struct vkd3d_physical_device_info *physical_device_info, -@@ -1742,6 +1847,9 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vulkan_info->EXT_shader_viewport_index_layer; - - vkd3d_init_feature_level(vulkan_info, features, &device->feature_options); -+ -+ vkd3d_override_caps(device); -+ - if (vulkan_info->max_feature_level < create_info->minimum_feature_level) - { - WARN("Feature level %#x is not supported.\n", create_info->minimum_feature_level); -@@ -1810,6 +1918,26 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - && descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind - && descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind; - -+ /* Many Vulkan implementations allow up to 8 descriptor sets. Unfortunately -+ * using vkd3d with Vulkan heaps and push descriptors currently requires up -+ * to 9 descriptor sets (up to one for the push descriptors, up to one for -+ * the static samplers and seven for Vulkan heaps, one for each needed -+ * descriptor type). If we detect such situation, we disable push -+ * descriptors, which allows us to stay within the limits (not doing so is -+ * fatal on many implmentations). -+ * -+ * It is possible that a different strategy might be used. For example, we -+ * could move the static samplers to one of the seven Vulkan heaps sets. Or -+ * we could decide whether to create the push descriptor set when creating -+ * the root signature, depending on whether there are static samplers or -+ * not. */ -+ if (device->vk_info.device_limits.maxBoundDescriptorSets == 8 && device->use_vk_heaps -+ && device->vk_info.KHR_push_descriptor) -+ { -+ TRACE("Disabling VK_KHR_push_descriptor to save a descriptor set.\n"); -+ device->vk_info.KHR_push_descriptor = VK_FALSE; -+ } -+ - if (device->use_vk_heaps) - vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->descriptor_indexing_properties); -@@ -1817,6 +1945,13 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->properties2.properties.limits); - -+ TRACE("Device %p: using %s descriptor heaps, with%s descriptor indexing, " -+ "with%s push descriptors, with%s mutable descriptors\n", -+ device, device->use_vk_heaps ? "Vulkan" : "virtual", -+ device->vk_info.EXT_descriptor_indexing ? "" : "out", -+ device->vk_info.KHR_push_descriptor ? "" : "out", -+ device->vk_info.EXT_mutable_descriptor_type ? "" : "out"); -+ - vkd3d_chain_physical_device_info_structures(physical_device_info, device); - - return S_OK; --- -2.45.2 - diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch deleted file mode 100644 index bfcbb63a..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch +++ /dev/null @@ -1,478 +0,0 @@ -From a1a3d168fcb8047c01bfe238ceec3e196fe7f077 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 24 Oct 2024 07:08:51 +1100 -Subject: [PATCH] Updated vkd3d to 91701f83035c0d67d1ab917e0f6b73f91e8583d4. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 12 -- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 15 --- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 12 -- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 105 +++++++++--------- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 6 - - libs/vkd3d/libs/vkd3d/device.c | 65 ++++++----- - libs/vkd3d/libs/vkd3d/resource.c | 14 ++- - libs/vkd3d/libs/vkd3d/state.c | 21 ++-- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 23 ++-- - 9 files changed, 123 insertions(+), 150 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index bbebf86e6d5..9e2eacbcfa6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1391,18 +1391,6 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - - if (ret < 0) - { -- WARN("Failed to parse shader.\n"); -- vsir_program_cleanup(program); -- return ret; -- } -- -- if ((ret = vkd3d_shader_parser_validate(&sm1.p, config_flags)) < 0) -- { -- WARN("Failed to validate shader after parsing, ret %d.\n", ret); -- -- if (TRACE_ON()) -- vsir_program_trace(program); -- - vsir_program_cleanup(program); - return ret; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 570af5eca5a..3235a278769 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -10603,22 +10603,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - vkd3d_free(byte_code); - - if (ret < 0) -- { -- WARN("Failed to parse shader.\n"); -- return ret; -- } -- -- if ((ret = vkd3d_shader_parser_validate(&sm6.p, config_flags)) < 0) -- { -- WARN("Failed to validate shader after parsing, ret %d.\n", ret); -- -- if (TRACE_ON()) -- vsir_program_trace(program); -- -- sm6_parser_cleanup(&sm6); -- vsir_program_cleanup(program); - return ret; -- } - - sm6_parser_cleanup(&sm6); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index f96d300676c..c937b245559 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2964,22 +2964,10 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - - if (sm4.p.failed) - { -- WARN("Failed to parse shader.\n"); - vsir_program_cleanup(program); - return VKD3D_ERROR_INVALID_SHADER; - } - -- if ((ret = vkd3d_shader_parser_validate(&sm4.p, config_flags)) < 0) -- { -- WARN("Failed to validate shader after parsing, ret %d.\n", ret); -- -- if (TRACE_ON()) -- vsir_program_trace(program); -- -- vsir_program_cleanup(program); -- return ret; -- } -- - return VKD3D_OK; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index ca012d4948a..3355e18b88e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -489,13 +489,13 @@ static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, - if ((f = fopen(filename, "wb"))) - { - if (fwrite(data, 1, size, f) != size) -- ERR("Failed to write shader to %s.\n", filename); -+ WARN("Failed to write shader to %s.\n", filename); - if (fclose(f)) -- ERR("Failed to close stream %s.\n", filename); -+ WARN("Failed to close stream %s.\n", filename); - } - else - { -- ERR("Failed to open %s for dumping shader.\n", filename); -+ WARN("Failed to open %s for dumping shader.\n", filename); - } - } - -@@ -680,6 +680,50 @@ static int vkd3d_shader_validate_compile_info(const struct vkd3d_shader_compile_ - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t config_flags, -+ struct vkd3d_shader_message_context *message_context, struct vsir_program *program) -+{ -+ enum vkd3d_result ret; -+ -+ switch (compile_info->source_type) -+ { -+ case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -+ ret = d3dbc_parse(compile_info, config_flags, message_context, program); -+ break; -+ -+ case VKD3D_SHADER_SOURCE_DXBC_TPF: -+ ret = tpf_parse(compile_info, config_flags, message_context, program); -+ break; -+ -+ case VKD3D_SHADER_SOURCE_DXBC_DXIL: -+ ret = dxil_parse(compile_info, config_flags, message_context, program); -+ break; -+ -+ default: -+ ERR("Unsupported source type %#x.\n", compile_info->source_type); -+ ret = VKD3D_ERROR_INVALID_ARGUMENT; -+ break; -+ } -+ -+ if (ret < 0) -+ { -+ WARN("Failed to parse shader.\n"); -+ return ret; -+ } -+ -+ if ((ret = vsir_program_validate(program, config_flags, compile_info->source_name, message_context)) < 0) -+ { -+ WARN("Failed to validate shader after parsing, ret %d.\n", ret); -+ -+ if (TRACE_ON()) -+ vsir_program_trace(program); -+ -+ vsir_program_cleanup(program); -+ } -+ -+ return ret; -+} -+ - void vkd3d_shader_free_messages(char *messages) - { - TRACE("messages %p.\n", messages); -@@ -1578,31 +1622,7 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char - uint64_t config_flags = vkd3d_shader_init_config_flags(); - struct vsir_program program; - -- switch (compile_info->source_type) -- { -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = tpf_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = dxil_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- default: -- ERR("Unsupported source type %#x.\n", compile_info->source_type); -- ret = VKD3D_ERROR_INVALID_ARGUMENT; -- break; -- } -- -- if (ret < 0) -- { -- WARN("Failed to parse shader.\n"); -- } -- else -+ if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) - { - ret = vsir_program_scan(&program, compile_info, &message_context, NULL); - vsir_program_cleanup(&program); -@@ -1719,38 +1739,15 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, - uint64_t config_flags = vkd3d_shader_init_config_flags(); - struct vsir_program program; - -- switch (compile_info->source_type) -- { -- case VKD3D_SHADER_SOURCE_D3D_BYTECODE: -- ret = d3dbc_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_TPF: -- ret = tpf_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- case VKD3D_SHADER_SOURCE_DXBC_DXIL: -- ret = dxil_parse(compile_info, config_flags, &message_context, &program); -- break; -- -- default: -- ERR("Unsupported source type %#x.\n", compile_info->source_type); -- ret = VKD3D_ERROR_INVALID_ARGUMENT; -- break; -- } -- -- if (ret < 0) -- { -- WARN("Failed to parse shader.\n"); -- } -- else -+ if (!(ret = vsir_parse(compile_info, config_flags, &message_context, &program))) - { - ret = vsir_program_compile(&program, config_flags, compile_info, out, &message_context); - vsir_program_cleanup(&program); - } - } - -- vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); -+ if (ret >= 0) -+ vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); - - vkd3d_shader_message_context_trace_messages(&message_context); - if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index d6c68155ee7..5ae938e0525 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1477,12 +1477,6 @@ void vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vsir_pr - void vkd3d_shader_parser_warning(struct vkd3d_shader_parser *parser, - enum vkd3d_shader_error error, const char *format, ...) VKD3D_PRINTF_FUNC(3, 4); - --static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser, uint64_t config_flags) --{ -- return vsir_program_validate(parser->program, config_flags, -- parser->location.source_name, parser->message_context); --} -- - struct vkd3d_shader_descriptor_info1 - { - enum vkd3d_shader_descriptor_type type; -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index fd0ca20838f..54a39e18a0f 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -136,7 +136,8 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic - VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - }; - -- if (device->vk_info.EXT_mutable_descriptor_type && index && index != VKD3D_SET_INDEX_UAV_COUNTER -+ if (device->vk_info.EXT_mutable_descriptor_type -+ && index != VKD3D_SET_INDEX_MUTABLE && index != VKD3D_SET_INDEX_UAV_COUNTER - && device->vk_descriptor_heap_layouts[index].applicable_heap_type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) - { - device->vk_descriptor_heap_layouts[index].vk_set_layout = VK_NULL_HANDLE; -@@ -144,7 +145,7 @@ static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *devic - } - - binding.binding = 0; -- binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && !index) -+ binding.descriptorType = (device->vk_info.EXT_mutable_descriptor_type && index == VKD3D_SET_INDEX_MUTABLE) - ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[index].type; - binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count; - binding.stageFlags = VK_SHADER_STAGE_ALL; -@@ -200,14 +201,20 @@ static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device - { - static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] = - { -- {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -- {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, -- /* UAV counters */ -- {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_UNIFORM_BUFFER] = -+ {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER] = -+ {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_SAMPLED_IMAGE] = -+ {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER] = -+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_STORAGE_IMAGE] = -+ {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, -+ [VKD3D_SET_INDEX_SAMPLER] = -+ {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, -+ [VKD3D_SET_INDEX_UAV_COUNTER] = -+ {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, - }; - const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; - enum vkd3d_vk_descriptor_set_index set; -@@ -1918,24 +1925,26 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - && descriptor_indexing->descriptorBindingUniformTexelBufferUpdateAfterBind - && descriptor_indexing->descriptorBindingStorageTexelBufferUpdateAfterBind; - -- /* Many Vulkan implementations allow up to 8 descriptor sets. Unfortunately -- * using vkd3d with Vulkan heaps and push descriptors currently requires up -- * to 9 descriptor sets (up to one for the push descriptors, up to one for -- * the static samplers and seven for Vulkan heaps, one for each needed -- * descriptor type). If we detect such situation, we disable push -- * descriptors, which allows us to stay within the limits (not doing so is -- * fatal on many implmentations). -- * -- * It is possible that a different strategy might be used. For example, we -- * could move the static samplers to one of the seven Vulkan heaps sets. Or -- * we could decide whether to create the push descriptor set when creating -- * the root signature, depending on whether there are static samplers or -- * not. */ -- if (device->vk_info.device_limits.maxBoundDescriptorSets == 8 && device->use_vk_heaps -- && device->vk_info.KHR_push_descriptor) -- { -- TRACE("Disabling VK_KHR_push_descriptor to save a descriptor set.\n"); -- device->vk_info.KHR_push_descriptor = VK_FALSE; -+ if (device->use_vk_heaps && device->vk_info.KHR_push_descriptor) -+ { -+ /* VKD3D_SET_INDEX_COUNT for the Vulkan heaps, one for the push -+ * descriptors set and one for the static samplers set. */ -+ unsigned int descriptor_set_count = VKD3D_SET_INDEX_COUNT + 2; -+ -+ /* A mutable descriptor set can replace all those that should otherwise -+ * back the SRV-UAV-CBV descriptor heap. */ -+ if (device->vk_info.EXT_mutable_descriptor_type) -+ descriptor_set_count -= VKD3D_SET_INDEX_COUNT - (VKD3D_SET_INDEX_MUTABLE + 1); -+ -+ /* For many Vulkan implementations maxBoundDescriptorSets == 8; also, -+ * if mutable descriptors are not available the descriptor set count -+ * will be 9; so saving a descriptor set is going to be often -+ * significant. */ -+ if (descriptor_set_count > device->vk_info.device_limits.maxBoundDescriptorSets) -+ { -+ WARN("Disabling VK_KHR_push_descriptor to save a descriptor set.\n"); -+ device->vk_info.KHR_push_descriptor = VK_FALSE; -+ } - } - - if (device->use_vk_heaps) -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 6d6820d3752..1f7d90eb95f 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -2498,7 +2498,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea - enum vkd3d_vk_descriptor_set_index set, end; - unsigned int i = writes->count; - -- end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_UNIFORM_BUFFER -+ end = device->vk_info.EXT_mutable_descriptor_type ? VKD3D_SET_INDEX_MUTABLE - : VKD3D_SET_INDEX_STORAGE_IMAGE; - /* Binding a shader with the wrong null descriptor type works in Windows. - * To support that here we must write one to all applicable Vulkan sets. */ -@@ -4250,7 +4250,8 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descrip - if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type - && device->vk_descriptor_heap_layouts[set].vk_set_layout) - { -- pool_sizes[pool_desc.poolSizeCount].type = (device->vk_info.EXT_mutable_descriptor_type && !set) -+ pool_sizes[pool_desc.poolSizeCount].type = -+ (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE) - ? VK_DESCRIPTOR_TYPE_MUTABLE_EXT : device->vk_descriptor_heap_layouts[set].type; - pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors; - } -@@ -4280,11 +4281,12 @@ static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descript - - if (!device->vk_descriptor_heap_layouts[set].vk_set_layout) - { -- /* Set 0 uses mutable descriptors, and this set is unused. */ -- if (!descriptor_heap->vk_descriptor_sets[0].vk_set -- && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, 0))) -+ /* Mutable descriptors are in use, and this set is unused. */ -+ if (!descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set -+ && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, -+ device, VKD3D_SET_INDEX_MUTABLE))) - return hr; -- descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[0].vk_set; -+ descriptor_set->vk_set = descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_MUTABLE].vk_set; - descriptor_set->vk_type = device->vk_descriptor_heap_layouts[set].type; - return S_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 8e5ec70a577..e7476a01bd7 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -1016,7 +1016,7 @@ static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d - } - else - { -- binding->set = 0; -+ binding->set = VKD3D_SET_INDEX_MUTABLE; - descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; - } - } -@@ -1483,21 +1483,24 @@ static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struc - { - const struct d3d12_device *device = root_signature->device; - enum vkd3d_vk_descriptor_set_index set; -+ VkDescriptorSetLayout vk_set_layout; - unsigned int i; - - for (i = 0; i < root_signature->vk_set_count; ++i) - vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout; - -- if (device->use_vk_heaps) -+ if (!device->use_vk_heaps) -+ return i; -+ -+ for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) - { -- VkDescriptorSetLayout mutable_layout = device->vk_descriptor_heap_layouts[0].vk_set_layout; -+ vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout; - -- for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) -- { -- VkDescriptorSetLayout vk_set_layout = device->vk_descriptor_heap_layouts[set].vk_set_layout; -- /* All layouts must be valid, so if null, just set it to the mutable one. */ -- vk_set_layouts[i++] = vk_set_layout ? vk_set_layout : mutable_layout; -- } -+ VKD3D_ASSERT(vk_set_layout); -+ vk_set_layouts[i++] = vk_set_layout; -+ -+ if (device->vk_info.EXT_mutable_descriptor_type && set == VKD3D_SET_INDEX_MUTABLE) -+ break; - } - - return i; -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index e6d477a5c12..97a99782d6a 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -772,14 +772,21 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev - - enum vkd3d_vk_descriptor_set_index - { -- VKD3D_SET_INDEX_UNIFORM_BUFFER = 0, -- VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1, -- VKD3D_SET_INDEX_SAMPLED_IMAGE = 2, -- VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3, -- VKD3D_SET_INDEX_STORAGE_IMAGE = 4, -- VKD3D_SET_INDEX_SAMPLER = 5, -- VKD3D_SET_INDEX_UAV_COUNTER = 6, -- VKD3D_SET_INDEX_COUNT = 7 -+ VKD3D_SET_INDEX_SAMPLER, -+ VKD3D_SET_INDEX_UAV_COUNTER, -+ VKD3D_SET_INDEX_MUTABLE, -+ -+ /* These are used when mutable descriptors are not available to back -+ * SRV-UAV-CBV descriptor heaps. They must stay at the end of this -+ * enumeration, so that they can be ignored when mutable descriptors are -+ * used. */ -+ VKD3D_SET_INDEX_UNIFORM_BUFFER = VKD3D_SET_INDEX_MUTABLE, -+ VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER, -+ VKD3D_SET_INDEX_SAMPLED_IMAGE, -+ VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER, -+ VKD3D_SET_INDEX_STORAGE_IMAGE, -+ -+ VKD3D_SET_INDEX_COUNT - }; - - extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; --- -2.45.2 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch deleted file mode 100644 index 9272b8a6..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch +++ /dev/null @@ -1,735 +0,0 @@ -From 8b3980f9cb077628557f7b37e9303af0bcb672d6 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 25 Oct 2024 07:38:01 +1100 -Subject: [PATCH] Updated vkd3d to 5eff8bf9188c401cc31ce14d42798dc3751377bd. - ---- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 112 ++++++++++++++----- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 29 +++++ - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 24 +++- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 2 + - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 118 +++++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 79 ++++++------- - 6 files changed, 282 insertions(+), 82 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index a2a090e1c21..363054cb6d9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -20,8 +20,14 @@ - - struct glsl_resource_type_info - { -+ /* The number of coordinates needed to sample the resource type. */ - size_t coord_size; -+ /* Whether the resource type is an array type. */ -+ bool array; -+ /* Whether the resource type has a shadow/comparison variant. */ - bool shadow; -+ /* The type suffix for resource type. I.e., the "2D" part of "usampler2D" -+ * or "iimage2D". */ - const char *type_suffix; - }; - -@@ -102,17 +108,17 @@ static const struct glsl_resource_type_info *shader_glsl_get_resource_type_info( - { - static const struct glsl_resource_type_info info[] = - { -- {0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ -- {1, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ -- {1, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ -- {2, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ -- {2, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ -- {3, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ -- {3, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ -- {2, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ -- {3, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ -- {3, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ -- {4, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ -+ {0, 0, 0, "None"}, /* VKD3D_SHADER_RESOURCE_NONE */ -+ {1, 0, 0, "Buffer"}, /* VKD3D_SHADER_RESOURCE_BUFFER */ -+ {1, 0, 1, "1D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ -+ {2, 0, 1, "2D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ -+ {2, 0, 0, "2DMS"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ -+ {3, 0, 0, "3D"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ -+ {3, 0, 1, "Cube"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ -+ {2, 1, 1, "1DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ -+ {3, 1, 1, "2DArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ -+ {3, 1, 0, "2DMSArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ -+ {4, 1, 1, "CubeArray"}, /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ - }; - - if (!t || t >= ARRAY_SIZE(info)) -@@ -862,17 +868,24 @@ static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, s - - static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -+ bool shadow_sampler, array, bias, gather, grad, lod, lod_zero, shadow; - const struct glsl_resource_type_info *resource_type_info; - unsigned int resource_id, resource_idx, resource_space; - unsigned int sampler_id, sampler_idx, sampler_space; - const struct vkd3d_shader_descriptor_info1 *d; - enum vkd3d_shader_component_type sampled_type; - enum vkd3d_shader_resource_type resource_type; -+ unsigned int component_idx, coord_size; - struct vkd3d_string_buffer *sample; - enum vkd3d_data_type data_type; -- unsigned int coord_size; - struct glsl_dst dst; -- bool shadow; -+ -+ bias = ins->opcode == VKD3DSIH_SAMPLE_B; -+ gather = ins->opcode == VKD3DSIH_GATHER4; -+ grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; -+ lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; -+ lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; -+ shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; - - if (vkd3d_shader_instruction_has_texel_offset(ins)) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -@@ -904,12 +917,14 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) - { - coord_size = resource_type_info->coord_size; -+ array = resource_type_info->array; - } - else - { - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled resource type %#x.", resource_type); - coord_size = 2; -+ array = false; - } - - sampler_id = ins->src[2].reg.idx[0].offset; -@@ -917,17 +932,17 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) - { - sampler_space = d->register_space; -- shadow = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; -+ shadow_sampler = d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_SAMPLER_COMPARISON_MODE; - -- if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ if (shadow) - { -- if (!shadow) -+ if (!shadow_sampler) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Sampler %u is not a comparison sampler.", sampler_id); - } - else - { -- if (shadow) -+ if (shadow_sampler) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Sampler %u is a comparison sampler.", sampler_id); - } -@@ -942,26 +957,44 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - glsl_dst_init(&dst, gen, ins, &ins->dst[0]); - sample = vkd3d_string_buffer_get(&gen->string_buffers); - -- if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ if (gather) -+ vkd3d_string_buffer_printf(sample, "textureGather("); -+ else if (grad) -+ vkd3d_string_buffer_printf(sample, "textureGrad("); -+ else if (lod) - vkd3d_string_buffer_printf(sample, "textureLod("); - else - vkd3d_string_buffer_printf(sample, "texture("); - shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); - vkd3d_string_buffer_printf(sample, ", "); -- if (ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ if (shadow) - shader_glsl_print_shadow_coord(sample, gen, &ins->src[0], &ins->src[3], coord_size); - else - shader_glsl_print_src(sample, gen, &ins->src[0], - vkd3d_write_mask_from_component_count(coord_size), ins->src[0].reg.data_type); -- if (ins->opcode == VKD3DSIH_SAMPLE_B) -+ if (grad) - { - vkd3d_string_buffer_printf(sample, ", "); -- shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); -+ shader_glsl_print_src(sample, gen, &ins->src[3], -+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[3].reg.data_type); -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[4], -+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[4].reg.data_type); - } -- else if (ins->opcode == VKD3DSIH_SAMPLE_C_LZ) -+ else if (lod_zero) - { - vkd3d_string_buffer_printf(sample, ", 0.0"); - } -+ else if (bias || lod) -+ { -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); -+ } -+ if (gather) -+ { -+ if ((component_idx = vsir_swizzle_get_component(ins->src[2].swizzle, 0))) -+ vkd3d_string_buffer_printf(sample, ", %d", component_idx); -+ } - vkd3d_string_buffer_printf(sample, ")"); - shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); - -@@ -1465,6 +1498,15 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_FTOU: - shader_glsl_cast(gen, ins, "uint", "uvec"); - break; -+ case VKD3DSIH_GATHER4: -+ case VKD3DSIH_SAMPLE: -+ case VKD3DSIH_SAMPLE_B: -+ case VKD3DSIH_SAMPLE_C: -+ case VKD3DSIH_SAMPLE_C_LZ: -+ case VKD3DSIH_SAMPLE_GRAD: -+ case VKD3DSIH_SAMPLE_LOD: -+ shader_glsl_sample(gen, ins); -+ break; - case VKD3DSIH_GEO: - case VKD3DSIH_IGE: - shader_glsl_relop(gen, ins, ">=", "greaterThanEqual"); -@@ -1482,9 +1524,11 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - break; - case VKD3DSIH_IMAX: - case VKD3DSIH_MAX: -+ case VKD3DSIH_UMAX: - shader_glsl_intrinsic(gen, ins, "max"); - break; - case VKD3DSIH_MIN: -+ case VKD3DSIH_UMIN: - shader_glsl_intrinsic(gen, ins, "min"); - break; - case VKD3DSIH_IMUL: -@@ -1553,12 +1597,6 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - case VKD3DSIH_RSQ: - shader_glsl_intrinsic(gen, ins, "inversesqrt"); - break; -- case VKD3DSIH_SAMPLE: -- case VKD3DSIH_SAMPLE_B: -- case VKD3DSIH_SAMPLE_C: -- case VKD3DSIH_SAMPLE_C_LZ: -- shader_glsl_sample(gen, ins); -- break; - case VKD3DSIH_SQRT: - shader_glsl_intrinsic(gen, ins, "sqrt"); - break; -@@ -2197,6 +2235,20 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator - } - } - -+static void shader_glsl_handle_global_flags(struct vkd3d_string_buffer *buffer, -+ struct vkd3d_glsl_generator *gen, enum vsir_global_flags flags) -+{ -+ if (flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL) -+ { -+ vkd3d_string_buffer_printf(buffer, "layout(early_fragment_tests) in;\n"); -+ flags &= ~VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; -+ } -+ -+ if (flags) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)flags); -+} -+ - static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) - { - const struct vsir_program *program = gen->program; -@@ -2210,9 +2262,7 @@ static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) - group_size->x, group_size->y, group_size->z); - } - -- if (program->global_flags) -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled global flags %#"PRIx64".", (uint64_t)program->global_flags); -+ shader_glsl_handle_global_flags(buffer, gen, program->global_flags); - - shader_glsl_generate_descriptor_declarations(gen); - shader_glsl_generate_input_declarations(gen); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index c7aa148ea11..cafff2fa878 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -1695,6 +1695,22 @@ struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node * - return &s->node; - } - -+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, -+ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_vsir_instruction_ref *vsir_instr; -+ -+ if (!(vsir_instr = hlsl_alloc(ctx, sizeof(*vsir_instr)))) -+ return NULL; -+ init_node(&vsir_instr->node, HLSL_IR_VSIR_INSTRUCTION_REF, type, loc); -+ vsir_instr->vsir_instr_idx = vsir_instr_idx; -+ -+ if (reg) -+ vsir_instr->node.reg = *reg; -+ -+ return &vsir_instr->node; -+} -+ - struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc) - { -@@ -2517,6 +2533,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - - case HLSL_IR_STATEBLOCK_CONSTANT: - return clone_stateblock_constant(ctx, map, hlsl_ir_stateblock_constant(instr)); -+ -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_unreachable(); - } - - vkd3d_unreachable(); -@@ -2938,6 +2957,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", - [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", - [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", -+ [HLSL_IR_VSIR_INSTRUCTION_REF] = "HLSL_IR_VSIR_INSTRUCTION_REF", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -3531,6 +3551,11 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - case HLSL_IR_STATEBLOCK_CONSTANT: - dump_ir_stateblock_constant(buffer, hlsl_ir_stateblock_constant(instr)); - break; -+ -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_string_buffer_printf(buffer, "vsir_program instruction %u", -+ hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx); -+ break; - } - } - -@@ -3839,6 +3864,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - case HLSL_IR_STATEBLOCK_CONSTANT: - free_ir_stateblock_constant(hlsl_ir_stateblock_constant(node)); - break; -+ -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_free(hlsl_ir_vsir_instruction_ref(node)); -+ break; - } - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index e234cd0ba40..ae7f8c1c04f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -328,6 +328,8 @@ enum hlsl_ir_node_type - HLSL_IR_COMPILE, - HLSL_IR_SAMPLER_STATE, - HLSL_IR_STATEBLOCK_CONSTANT, -+ -+ HLSL_IR_VSIR_INSTRUCTION_REF, - }; - - /* Common data for every type of IR instruction node. */ -@@ -930,6 +932,16 @@ struct hlsl_ir_stateblock_constant - char *name; - }; - -+/* A vkd3d_shader_instruction that can be inserted in a hlsl_block. -+ * Only used for the HLSL IR to vsir translation, might be removed once this translation is complete. */ -+struct hlsl_ir_vsir_instruction_ref -+{ -+ struct hlsl_ir_node node; -+ -+ /* Index to a vkd3d_shader_instruction within a vkd3d_shader_instruction_array in a vsir_program. */ -+ unsigned int vsir_instr_idx; -+}; -+ - struct hlsl_scope - { - /* Item entry for hlsl_ctx.scopes. */ -@@ -1245,6 +1257,12 @@ static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(co - return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); - } - -+static inline struct hlsl_ir_vsir_instruction_ref *hlsl_ir_vsir_instruction_ref(const struct hlsl_ir_node *node) -+{ -+ VKD3D_ASSERT(node->type == HLSL_IR_VSIR_INSTRUCTION_REF); -+ return CONTAINING_RECORD(node, struct hlsl_ir_vsir_instruction_ref, node); -+} -+ - static inline void hlsl_block_init(struct hlsl_block *block) - { - list_init(&block->instrs); -@@ -1433,9 +1451,6 @@ struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, - - void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body); - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); --uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); --void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); --void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out); - int hlsl_emit_effect_binary(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out); -@@ -1570,6 +1585,9 @@ struct hlsl_ir_switch_case *hlsl_new_switch_case(struct hlsl_ctx *ctx, unsigned - struct hlsl_ir_node *hlsl_new_switch(struct hlsl_ctx *ctx, struct hlsl_ir_node *selector, - struct list *cases, const struct vkd3d_shader_location *loc); - -+struct hlsl_ir_node *hlsl_new_vsir_instruction_ref(struct hlsl_ctx *ctx, unsigned int vsir_instr_idx, -+ struct hlsl_type *type, const struct hlsl_reg *reg, const struct vkd3d_shader_location *loc); -+ - void hlsl_error(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, - enum vkd3d_shader_error error, const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5); - void hlsl_fixme(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 49cff4c81b8..cd938fd5906 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -672,6 +672,8 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - break; -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vkd3d_unreachable(); - } - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 6e1b2b437b0..2cb56d6b493 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -4162,6 +4162,9 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_STATEBLOCK_CONSTANT: - /* Stateblock constants should not appear in the shader program. */ - vkd3d_unreachable(); -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ -+ vkd3d_unreachable(); - } - - return false; -@@ -4193,7 +4196,7 @@ static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - return true; - } - --void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+static void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct hlsl_scope *scope; - struct hlsl_ir_var *var; -@@ -4301,6 +4304,9 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - case HLSL_IR_STATEBLOCK_CONSTANT: - /* Stateblock constants should not appear in the shader program. */ - vkd3d_unreachable(); -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ /* HLSL IR nodes are not translated to hlsl_ir_vsir_instruction_ref at this point. */ -+ vkd3d_unreachable(); - - case HLSL_IR_STORE: - { -@@ -4441,7 +4447,7 @@ static void init_var_liveness(struct hlsl_ir_var *var) - var->last_read = UINT_MAX; - } - --void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct hlsl_scope *scope; - struct hlsl_ir_var *var; -@@ -5222,7 +5228,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - * index to all (simultaneously live) variables or intermediate values. Agnostic - * as to how many registers are actually available for the current backend, and - * does not handle constants. */ --uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) -+static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct register_allocator allocator = {0}; - struct hlsl_scope *scope; -@@ -7536,10 +7542,101 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - sm1_generate_vsir_block(ctx, &entry_func->body, program); - } - -+static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_block *block) -+{ -+ struct vkd3d_shader_location *loc; -+ struct hlsl_ir_node *vsir_instr; -+ -+ loc = &program->instructions.elements[program->instructions.count - 1].location; -+ -+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, program->instructions.count - 1, NULL, NULL, loc))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ hlsl_block_add_instr(block, vsir_instr); -+} -+ -+static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, -+ uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_TEMPS, 0, 0))) -+ return; -+ -+ ins->declaration.count = temp_count; -+ -+ add_last_vsir_instr_to_block(ctx, program, block); -+} -+ -+static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_block *block, uint32_t idx, -+ uint32_t size, uint32_t comp_count, const struct vkd3d_shader_location *loc) -+{ -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_INDEXABLE_TEMP, 0, 0))) -+ return; -+ -+ ins->declaration.indexable_temp.register_idx = idx; -+ ins->declaration.indexable_temp.register_size = size; -+ ins->declaration.indexable_temp.alignment = 0; -+ ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT; -+ ins->declaration.indexable_temp.component_count = comp_count; -+ ins->declaration.indexable_temp.has_function_scope = false; -+ -+ add_last_vsir_instr_to_block(ctx, program, block); -+} -+ -+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) -+{ -+ struct hlsl_block block = {0}; -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ uint32_t temp_count; -+ -+ compute_liveness(ctx, func); -+ mark_indexable_vars(ctx, func); -+ temp_count = allocate_temp_registers(ctx, func); -+ if (ctx->result) -+ return; -+ program->temp_count = max(program->temp_count, temp_count); -+ -+ hlsl_block_init(&block); -+ -+ if (temp_count) -+ sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); -+ -+ LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -+ { -+ LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -+ continue; -+ if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -+ continue; -+ -+ if (var->indexable) -+ { -+ unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -+ unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -+ -+ sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc); -+ } -+ } -+ } -+ -+ list_move_head(&func->body.instrs, &block.instrs); -+ -+ hlsl_block_cleanup(&block); -+} -+ - /* OBJECTIVE: Translate all the information from ctx and entry_func to the - * vsir_program, so it can be used as input to tpf_compile() without relying - * on ctx and entry_func. */ --static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, - uint64_t config_flags, struct vsir_program *program) - { - struct vkd3d_shader_version version = {0}; -@@ -7554,9 +7651,20 @@ static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - return; - } - -- generate_vsir_signature(ctx, program, entry_func); -+ generate_vsir_signature(ctx, program, func); - if (version.type == VKD3D_SHADER_TYPE_HULL) - generate_vsir_signature(ctx, program, ctx->patch_constant_func); -+ -+ if (version.type == VKD3D_SHADER_TYPE_COMPUTE) -+ { -+ program->thread_group_size.x = ctx->thread_count[0]; -+ program->thread_group_size.y = ctx->thread_count[1]; -+ program->thread_group_size.z = ctx->thread_count[2]; -+ } -+ -+ sm4_generate_vsir_add_function(ctx, func, config_flags, program); -+ if (version.type == VKD3D_SHADER_TYPE_HULL) -+ sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program); - } - - static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index c937b245559..2198b828b7c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -3002,9 +3002,10 @@ bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, - } - register_table[] = - { -- {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, -- {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, -- {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, -+ {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADID, false}, -+ {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_THREADGROUPID, false}, -+ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADINDEX, false}, -+ {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3DSPR_LOCALTHREADID, false}, - - {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_TESSCOORD, false}, - {"sv_primitiveid", false, VKD3D_SHADER_TYPE_DOMAIN, VKD3DSPR_PRIMID, false}, -@@ -3105,6 +3106,7 @@ bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *s - { - {"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, -+ {"sv_groupindex", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, - - {"sv_domainlocation", false, VKD3D_SHADER_TYPE_DOMAIN, ~0u}, -@@ -4935,42 +4937,39 @@ static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_temps(const struct tpf_compiler *tpf, uint32_t temp_count) -+static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_TEMPS, - -- .idx = {temp_count}, -+ .idx = {count}, - .idx_count = 1, - }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_indexable_temp(const struct tpf_compiler *tpf, uint32_t idx, -- uint32_t size, uint32_t comp_count) -+static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, - -- .idx = {idx, size, comp_count}, -+ .idx = {temp->register_idx, temp->register_size, temp->component_count}, - .idx_count = 3, - }; - - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_dcl_thread_group(const struct tpf_compiler *tpf, const uint32_t thread_count[3]) -+static void tpf_dcl_thread_group(const struct tpf_compiler *tpf, const struct vsir_thread_group_size *group_size) - { - struct sm4_instruction instr = - { - .opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP, - -- .idx[0] = thread_count[0], -- .idx[1] = thread_count[1], -- .idx[2] = thread_count[2], -+ .idx = {group_size->x, group_size->y, group_size->z}, - .idx_count = 3, - }; - -@@ -6449,9 +6448,28 @@ static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ - write_sm4_instruction(tpf, &instr); - } - -+static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) -+{ -+ switch (ins->opcode) -+ { -+ case VKD3DSIH_DCL_TEMPS: -+ tpf_dcl_temps(tpf, ins->declaration.count); -+ break; -+ -+ case VKD3DSIH_DCL_INDEXABLE_TEMP: -+ tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ break; -+ } -+} -+ - static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; -+ unsigned int vsir_instr_idx; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) - { -@@ -6517,6 +6535,11 @@ static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_bl - write_sm4_swizzle(tpf, hlsl_ir_swizzle(instr)); - break; - -+ case HLSL_IR_VSIR_INSTRUCTION_REF: -+ vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx; -+ tpf_handle_instruction(tpf, &tpf->program->instructions.elements[vsir_instr_idx]); -+ break; -+ - default: - hlsl_fixme(tpf->ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type)); - } -@@ -6526,15 +6549,7 @@ static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_bl - static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) - { - struct hlsl_ctx *ctx = tpf->ctx; -- const struct hlsl_scope *scope; - const struct hlsl_ir_var *var; -- uint32_t temp_count; -- -- compute_liveness(ctx, func); -- mark_indexable_vars(ctx, func); -- temp_count = allocate_temp_registers(ctx, func); -- if (ctx->result) -- return; - - LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -6544,29 +6559,7 @@ static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_f - } - - if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) -- write_sm4_dcl_thread_group(tpf, ctx->thread_count); -- -- if (temp_count) -- write_sm4_dcl_temps(tpf, temp_count); -- -- LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry) -- { -- LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry) -- { -- if (var->is_uniform || var->is_input_semantic || var->is_output_semantic) -- continue; -- if (!var->regs[HLSL_REGSET_NUMERIC].allocated) -- continue; -- -- if (var->indexable) -- { -- unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id; -- unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4; -- -- write_sm4_dcl_indexable_temp(tpf, id, size, 4); -- } -- } -- } -+ tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); - - write_sm4_block(tpf, &func->body); - --- -2.45.2 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch deleted file mode 100644 index 91c1d26b..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch +++ /dev/null @@ -1,971 +0,0 @@ -From b9b2a90fbbc410f7ceec9afe02e483de2465d608 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 30 Oct 2024 10:33:09 +1100 -Subject: [PATCH] Updated vkd3d to ad2208b726f825305f69d099790208e4e4f85e35. - ---- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 4 + - libs/vkd3d/libs/vkd3d-shader/fx.c | 322 ++++++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 9 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 4 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 139 +++++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 30 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 14 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 45 +-- - libs/vkd3d/libs/vkd3d/device.c | 2 +- - 10 files changed, 442 insertions(+), 128 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index bc28aebed4d..7c5444f63a3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -1189,6 +1189,10 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - vkd3d_string_buffer_printf(buffer, "vWaveLaneIndex"); - break; - -+ case VKD3DSPR_PARAMETER: -+ vkd3d_string_buffer_printf(buffer, "parameter"); -+ break; -+ - case VKD3DSPR_POINT_COORD: - vkd3d_string_buffer_printf(buffer, "vPointCoord"); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index d901f08d50d..8954feb22b7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -499,7 +499,35 @@ enum fx_4_type_constants - FX_4_NUMERIC_COLUMN_MAJOR_MASK = 0x4000, - - /* Object types */ -- FX_4_OBJECT_TYPE_STRING = 1, -+ FX_4_OBJECT_TYPE_STRING = 0x1, -+ FX_4_OBJECT_TYPE_PIXEL_SHADER = 0x5, -+ FX_4_OBJECT_TYPE_VERTEX_SHADER = 0x6, -+ FX_4_OBJECT_TYPE_GEOMETRY_SHADER = 0x7, -+ FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO = 0x8, -+ -+ FX_4_OBJECT_TYPE_TEXTURE = 0x9, -+ FX_4_OBJECT_TYPE_TEXTURE_1D = 0xa, -+ FX_4_OBJECT_TYPE_TEXTURE_1DARRAY = 0xb, -+ FX_4_OBJECT_TYPE_TEXTURE_2D = 0xc, -+ FX_4_OBJECT_TYPE_TEXTURE_2DARRAY = 0xd, -+ FX_4_OBJECT_TYPE_TEXTURE_2DMS = 0xe, -+ FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY = 0xf, -+ FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10, -+ FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11, -+ FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17, -+ -+ FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b, -+ FX_5_OBJECT_TYPE_COMPUTE_SHADER = 0x1c, -+ FX_5_OBJECT_TYPE_HULL_SHADER = 0x1d, -+ FX_5_OBJECT_TYPE_DOMAIN_SHADER = 0x1e, -+ -+ FX_5_OBJECT_TYPE_UAV_1D = 0x1f, -+ FX_5_OBJECT_TYPE_UAV_1DARRAY = 0x20, -+ FX_5_OBJECT_TYPE_UAV_2D = 0x21, -+ FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22, -+ FX_5_OBJECT_TYPE_UAV_3D = 0x23, -+ FX_5_OBJECT_TYPE_UAV_BUFFER = 0x24, -+ FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER = 0x28, - - /* Types */ - FX_4_TYPE_CLASS_NUMERIC = 1, -@@ -764,16 +792,16 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - static const uint32_t texture_type[] = - { -- [HLSL_SAMPLER_DIM_GENERIC] = 9, -- [HLSL_SAMPLER_DIM_1D] = 10, -- [HLSL_SAMPLER_DIM_1DARRAY] = 11, -- [HLSL_SAMPLER_DIM_2D] = 12, -- [HLSL_SAMPLER_DIM_2DARRAY] = 13, -- [HLSL_SAMPLER_DIM_2DMS] = 14, -- [HLSL_SAMPLER_DIM_2DMSARRAY] = 15, -- [HLSL_SAMPLER_DIM_3D] = 16, -- [HLSL_SAMPLER_DIM_CUBE] = 17, -- [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, -+ [HLSL_SAMPLER_DIM_GENERIC] = FX_4_OBJECT_TYPE_TEXTURE, -+ [HLSL_SAMPLER_DIM_1D] = FX_4_OBJECT_TYPE_TEXTURE_1D, -+ [HLSL_SAMPLER_DIM_1DARRAY] = FX_4_OBJECT_TYPE_TEXTURE_1DARRAY, -+ [HLSL_SAMPLER_DIM_2D] = FX_4_OBJECT_TYPE_TEXTURE_2D, -+ [HLSL_SAMPLER_DIM_2DARRAY] = FX_4_OBJECT_TYPE_TEXTURE_2DARRAY, -+ [HLSL_SAMPLER_DIM_2DMS] = FX_4_OBJECT_TYPE_TEXTURE_2DMS, -+ [HLSL_SAMPLER_DIM_2DMSARRAY] = FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY, -+ [HLSL_SAMPLER_DIM_3D] = FX_4_OBJECT_TYPE_TEXTURE_3D, -+ [HLSL_SAMPLER_DIM_CUBE] = FX_4_OBJECT_TYPE_TEXTURE_CUBE, -+ [HLSL_SAMPLER_DIM_CUBEARRAY] = FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY, - }; - - put_u32_unaligned(buffer, texture_type[element_type->sampler_dim]); -@@ -786,13 +814,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - static const uint32_t uav_type[] = - { -- [HLSL_SAMPLER_DIM_1D] = 31, -- [HLSL_SAMPLER_DIM_1DARRAY] = 32, -- [HLSL_SAMPLER_DIM_2D] = 33, -- [HLSL_SAMPLER_DIM_2DARRAY] = 34, -- [HLSL_SAMPLER_DIM_3D] = 35, -- [HLSL_SAMPLER_DIM_BUFFER] = 36, -- [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, -+ [HLSL_SAMPLER_DIM_1D] = FX_5_OBJECT_TYPE_UAV_1D, -+ [HLSL_SAMPLER_DIM_1DARRAY] = FX_5_OBJECT_TYPE_UAV_1DARRAY, -+ [HLSL_SAMPLER_DIM_2D] = FX_5_OBJECT_TYPE_UAV_2D, -+ [HLSL_SAMPLER_DIM_2DARRAY] = FX_5_OBJECT_TYPE_UAV_2DARRAY, -+ [HLSL_SAMPLER_DIM_3D] = FX_5_OBJECT_TYPE_UAV_3D, -+ [HLSL_SAMPLER_DIM_BUFFER] = FX_5_OBJECT_TYPE_UAV_BUFFER, -+ [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER, - }; - - put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); -@@ -807,11 +835,11 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - } - else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) - { -- put_u32_unaligned(buffer, 5); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_PIXEL_SHADER); - } - else if (element_type->class == HLSL_CLASS_VERTEX_SHADER) - { -- put_u32_unaligned(buffer, 6); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_VERTEX_SHADER); - } - else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE) - { -@@ -836,15 +864,15 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - } - else if (element_type->class == HLSL_CLASS_COMPUTE_SHADER) - { -- put_u32_unaligned(buffer, 28); -+ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_COMPUTE_SHADER); - } - else if (element_type->class == HLSL_CLASS_HULL_SHADER) - { -- put_u32_unaligned(buffer, 29); -+ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_HULL_SHADER); - } - else if (element_type->class == HLSL_CLASS_DOMAIN_SHADER) - { -- put_u32_unaligned(buffer, 30); -+ put_u32_unaligned(buffer, FX_5_OBJECT_TYPE_DOMAIN_SHADER); - } - else - { -@@ -1568,20 +1596,17 @@ static uint32_t write_fx_4_state_numeric_value(struct hlsl_ir_constant *value, s - - for (i = 0; i < count; ++i) - { -- if (hlsl_is_numeric_type(data_type)) -+ switch (data_type->e.numeric.type) - { -- switch (data_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- case HLSL_TYPE_BOOL: -- type = fx_4_numeric_base_types[data_type->e.numeric.type]; -- break; -- default: -- type = 0; -- hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); -- } -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ type = fx_4_numeric_base_types[data_type->e.numeric.type]; -+ break; -+ default: -+ type = 0; -+ hlsl_fixme(ctx, &ctx->location, "Unsupported numeric state value type %u.", data_type->e.numeric.type); - } - - put_u32_unaligned(buffer, type); -@@ -2922,19 +2947,28 @@ static int fx_2_parse(struct fx_parser *parser) - return -1; - } - --static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) -+static const void *fx_parser_get_unstructured_ptr(struct fx_parser *parser, uint32_t offset, size_t size) - { - const uint8_t *ptr = parser->unstructured.ptr; - -- memset(dst, 0, size); - if (offset >= parser->unstructured.size - || size > parser->unstructured.size - offset) - { - parser->failed = true; -- return; -+ return NULL; - } - -- ptr += offset; -+ return &ptr[offset]; -+} -+ -+static void fx_parser_read_unstructured(struct fx_parser *parser, void *dst, uint32_t offset, size_t size) -+{ -+ const uint8_t *ptr; -+ -+ memset(dst, 0, size); -+ if (!(ptr = fx_parser_get_unstructured_ptr(parser, offset, size))) -+ return; -+ - memcpy(dst, ptr, size); - } - -@@ -3164,6 +3198,188 @@ static void fx_parse_buffers(struct fx_parser *parser) - } - } - -+static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int object_type) -+{ -+ struct vkd3d_shader_compile_info info = { 0 }; -+ struct vkd3d_shader_code output; -+ uint32_t data_size, offset; -+ const void *data = NULL; -+ const char *p, *q, *end; -+ struct fx_5_shader -+ { -+ uint32_t offset; -+ uint32_t sodecl[4]; -+ uint32_t sodecl_count; -+ uint32_t rast_stream; -+ uint32_t iface_bindings_count; -+ uint32_t iface_bindings; -+ } shader5; -+ struct fx_4_gs_so -+ { -+ uint32_t offset; -+ uint32_t sodecl; -+ } gs_so; -+ int ret; -+ -+ static const struct vkd3d_shader_compile_option options[] = -+ { -+ {VKD3D_SHADER_COMPILE_OPTION_API_VERSION, VKD3D_SHADER_API_VERSION_1_13}, -+ }; -+ -+ switch (object_type) -+ { -+ case FX_4_OBJECT_TYPE_PIXEL_SHADER: -+ case FX_4_OBJECT_TYPE_VERTEX_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -+ offset = fx_parser_read_u32(parser); -+ break; -+ -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -+ fx_parser_read_u32s(parser, &gs_so, sizeof(gs_so)); -+ offset = gs_so.offset; -+ break; -+ -+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -+ case FX_5_OBJECT_TYPE_HULL_SHADER: -+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: -+ fx_parser_read_u32s(parser, &shader5, sizeof(shader5)); -+ offset = shader5.offset; -+ break; -+ -+ default: -+ parser->failed = true; -+ return; -+ } -+ -+ fx_parser_read_unstructured(parser, &data_size, offset, sizeof(data_size)); -+ if (data_size) -+ data = fx_parser_get_unstructured_ptr(parser, offset + 4, data_size); -+ -+ if (!data) -+ return; -+ -+ info.type = VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_INFO; -+ info.source.code = data; -+ info.source.size = data_size; -+ info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; -+ info.target_type = VKD3D_SHADER_TARGET_D3D_ASM; -+ info.options = options; -+ info.option_count = ARRAY_SIZE(options); -+ info.log_level = VKD3D_SHADER_LOG_INFO; -+ -+ if ((ret = vkd3d_shader_compile(&info, &output, NULL)) < 0) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "Failed to disassemble shader blob.\n"); -+ return; -+ } -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "asm {\n"); -+ -+ parse_fx_start_indent(parser); -+ -+ end = (const char *)output.code + output.size; -+ for (p = output.code; p < end; p = q) -+ { -+ if (!(q = memchr(p, '\n', end - p))) -+ q = end; -+ else -+ ++q; -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "%.*s", (int)(q - p), p); -+ } -+ -+ parse_fx_end_indent(parser); -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "}"); -+ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER && gs_so.sodecl) -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", -+ fx_4_get_string(parser, gs_so.sodecl)); -+ } -+ else if (object_type == FX_5_OBJECT_TYPE_GEOMETRY_SHADER) -+ { -+ for (unsigned int i = 0; i < ARRAY_SIZE(shader5.sodecl); ++i) -+ { -+ if (shader5.sodecl[i]) -+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output %u declaration: \"%s\" */", -+ i, fx_4_get_string(parser, shader5.sodecl[i])); -+ } -+ if (shader5.sodecl_count) -+ vkd3d_string_buffer_printf(&parser->buffer, "\n/* Rasterized stream %u */", shader5.rast_stream); -+ } -+ -+ vkd3d_shader_free_shader_code(&output); -+} -+ -+static bool fx_4_is_shader_resource(const struct fx_4_binary_type *type) -+{ -+ switch (type->typeinfo) -+ { -+ case FX_4_OBJECT_TYPE_TEXTURE: -+ case FX_4_OBJECT_TYPE_TEXTURE_1D: -+ case FX_4_OBJECT_TYPE_TEXTURE_1DARRAY: -+ case FX_4_OBJECT_TYPE_TEXTURE_2D: -+ case FX_4_OBJECT_TYPE_TEXTURE_2DARRAY: -+ case FX_4_OBJECT_TYPE_TEXTURE_2DMS: -+ case FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY: -+ case FX_4_OBJECT_TYPE_TEXTURE_3D: -+ case FX_4_OBJECT_TYPE_TEXTURE_CUBE: -+ case FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY: -+ case FX_5_OBJECT_TYPE_UAV_1D: -+ case FX_5_OBJECT_TYPE_UAV_1DARRAY: -+ case FX_5_OBJECT_TYPE_UAV_2D: -+ case FX_5_OBJECT_TYPE_UAV_2DARRAY: -+ case FX_5_OBJECT_TYPE_UAV_3D: -+ case FX_5_OBJECT_TYPE_UAV_BUFFER: -+ case FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct fx_4_binary_type *type) -+{ -+ unsigned int i, element_count; -+ uint32_t value; -+ -+ vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); -+ element_count = max(type->element_count, 1); -+ for (i = 0; i < element_count; ++i) -+ { -+ switch (type->typeinfo) -+ { -+ case FX_4_OBJECT_TYPE_STRING: -+ vkd3d_string_buffer_printf(&parser->buffer, " "); -+ value = fx_parser_read_u32(parser); -+ fx_4_parse_string_initializer(parser, value); -+ break; -+ case FX_4_OBJECT_TYPE_PIXEL_SHADER: -+ case FX_4_OBJECT_TYPE_VERTEX_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -+ case FX_5_OBJECT_TYPE_HULL_SHADER: -+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: -+ parse_fx_start_indent(parser); -+ fx_4_parse_shader_initializer(parser, type->typeinfo); -+ parse_fx_end_indent(parser); -+ break; -+ default: -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Parsing object type %u is not implemented.", type->typeinfo); -+ return; -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, ",\n"); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, "}"); -+} -+ - static void fx_4_parse_objects(struct fx_parser *parser) - { - struct fx_4_object_variable -@@ -3173,9 +3389,9 @@ static void fx_4_parse_objects(struct fx_parser *parser) - uint32_t semantic; - uint32_t bind_point; - } var; -- uint32_t i, j, value, element_count; - struct fx_4_binary_type type; - const char *name, *type_name; -+ uint32_t i; - - if (parser->failed) - return; -@@ -3190,26 +3406,12 @@ static void fx_4_parse_objects(struct fx_parser *parser) - vkd3d_string_buffer_printf(&parser->buffer, "%s %s", type_name, name); - if (type.element_count) - vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); -- vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); - -- element_count = max(type.element_count, 1); -- for (j = 0; j < element_count; ++j) -- { -- switch (type.typeinfo) -- { -- case FX_4_OBJECT_TYPE_STRING: -- vkd3d_string_buffer_printf(&parser->buffer, " "); -- value = fx_parser_read_u32(parser); -- fx_4_parse_string_initializer(parser, value); -- break; -- default: -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -- "Parsing object type %u is not implemented.\n", type.typeinfo); -- return; -- } -- vkd3d_string_buffer_printf(&parser->buffer, ",\n"); -- } -- vkd3d_string_buffer_printf(&parser->buffer, "};\n"); -+ if (!fx_4_is_shader_resource(&type)) -+ fx_4_parse_object_initializer(parser, &type); -+ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); -+ -+ fx_parse_fx_4_annotations(parser); - } - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index cafff2fa878..1f90a4ba805 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2799,6 +2799,11 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - - case HLSL_CLASS_UAV: -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ vkd3d_string_buffer_printf(string, "RWByteAddressBuffer"); -+ return string; -+ } - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - vkd3d_string_buffer_printf(string, "RWBuffer"); - else if (type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) -@@ -4445,8 +4450,6 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - - rb_destroy(&ctx->functions, free_function_rb, NULL); - -- hlsl_block_cleanup(&ctx->static_initializers); -- - /* State blocks must be free before the variables, because they contain instructions that may - * refer to them. */ - LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) -@@ -4462,6 +4465,8 @@ static void hlsl_ctx_cleanup(struct hlsl_ctx *ctx) - } - } - -+ hlsl_block_cleanup(&ctx->static_initializers); -+ - LIST_FOR_EACH_ENTRY_SAFE(scope, next_scope, &ctx->scopes, struct hlsl_scope, entry) - { - LIST_FOR_EACH_ENTRY_SAFE(var, next_var, &scope->vars, struct hlsl_ir_var, scope_entry) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index ae7f8c1c04f..f890784bb8f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -136,7 +136,8 @@ enum hlsl_sampler_dim - HLSL_SAMPLER_DIM_CUBEARRAY, - HLSL_SAMPLER_DIM_BUFFER, - HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, -- HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_STRUCTURED_BUFFER, -+ HLSL_SAMPLER_DIM_RAW_BUFFER, -+ HLSL_SAMPLER_DIM_MAX = HLSL_SAMPLER_DIM_RAW_BUFFER, - /* NOTE: Remember to update object_methods[] in hlsl.y if this enum is modified. */ - }; - -@@ -1394,6 +1395,7 @@ static inline unsigned int hlsl_sampler_dim_count(enum hlsl_sampler_dim dim) - { - case HLSL_SAMPLER_DIM_1D: - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return 1; - case HLSL_SAMPLER_DIM_1DARRAY: -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index ca983fc5ffd..18effcc5be1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -127,6 +127,7 @@ RenderTargetView {return KW_RENDERTARGETVIEW; } - return {return KW_RETURN; } - row_major {return KW_ROW_MAJOR; } - RWBuffer {return KW_RWBUFFER; } -+RWByteAddressBuffer {return KW_RWBYTEADDRESSBUFFER; } - RWStructuredBuffer {return KW_RWSTRUCTUREDBUFFER; } - RWTexture1D {return KW_RWTEXTURE1D; } - RWTexture1DArray {return KW_RWTEXTURE1DARRAY; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index cd938fd5906..dcbba46ede6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -332,6 +332,9 @@ static void check_condition_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node - { - const struct hlsl_type *type = cond->data_type; - -+ if (type->class == HLSL_CLASS_ERROR) -+ return; -+ - if (type->class > HLSL_CLASS_LAST_NUMERIC || type->dimx > 1 || type->dimy > 1) - { - struct vkd3d_string_buffer *string; -@@ -644,6 +647,9 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx - struct hlsl_block expr; - struct hlsl_src src; - -+ if (node_from_block(block)->data_type->class == HLSL_CLASS_ERROR) -+ return ret; -+ - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { - switch (node->type) -@@ -938,6 +944,9 @@ static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, - { - struct hlsl_ir_node *store; - -+ if (return_value->data_type->class == HLSL_CLASS_ERROR) -+ return true; -+ - if (!(return_value = add_implicit_conversion(ctx, block, return_value, return_type, loc))) - return false; - -@@ -1234,7 +1243,8 @@ static bool add_record_access_recurse(struct hlsl_ctx *ctx, struct hlsl_block *b - } - - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED, "Field \"%s\" is not defined.", name); -- return false; -+ block->value = ctx->error_instr; -+ return true; - } - - static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, struct list *list) -@@ -5569,6 +5579,7 @@ static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) - case HLSL_SAMPLER_DIM_CUBE: - case HLSL_SAMPLER_DIM_CUBEARRAY: - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - /* Offset parameters not supported for these types. */ - return 0; - default: -@@ -6186,32 +6197,85 @@ static bool add_sample_grad_method_call(struct hlsl_ctx *ctx, struct hlsl_block - return true; - } - -+static bool add_store_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *offset, *rhs, *store; -+ struct hlsl_deref resource_deref; -+ unsigned int value_dim; -+ -+ if (params->args_count != 2) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method '%s': expected 2.", name); -+ return false; -+ } -+ -+ if (!strcmp(name, "Store")) -+ value_dim = 1; -+ else if (!strcmp(name, "Store2")) -+ value_dim = 2; -+ else if (!strcmp(name, "Store3")) -+ value_dim = 3; -+ else -+ value_dim = 4; -+ -+ if (!(offset = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ return false; -+ -+ if (!(rhs = add_implicit_conversion(ctx, block, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim), loc))) -+ return false; -+ -+ if (!hlsl_init_deref_from_index_chain(ctx, &resource_deref, object)) -+ return false; -+ -+ if (!(store = hlsl_new_resource_store(ctx, &resource_deref, offset, rhs, loc))) -+ { -+ hlsl_cleanup_deref(&resource_deref); -+ return false; -+ } -+ -+ hlsl_block_add_instr(block, store); -+ hlsl_cleanup_deref(&resource_deref); -+ -+ return true; -+} -+ - static const struct method_function - { - const char *name; - bool (*handler)(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc); -- bool valid_dims[HLSL_SAMPLER_DIM_MAX + 1]; -+ char valid_dims[HLSL_SAMPLER_DIM_MAX + 1]; - } --object_methods[] = -+texture_methods[] = - { -- /* g c 1d 2d 3d cube 1darr 2darr 2dms 2dmsarr cubearr buff sbuff*/ -- { "Gather", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherAlpha", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherBlue", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherGreen", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -- { "GatherRed", add_gather_method_call, {0,0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0}}, -+ { "Gather", add_gather_method_call, "00010101001000" }, -+ { "GatherAlpha", add_gather_method_call, "00010101001000" }, -+ { "GatherBlue", add_gather_method_call, "00010101001000" }, -+ { "GatherGreen", add_gather_method_call, "00010101001000" }, -+ { "GatherRed", add_gather_method_call, "00010101001000" }, -+ -+ { "GetDimensions", add_getdimensions_method_call, "00111111111110" }, - -- { "GetDimensions", add_getdimensions_method_call, {0,0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}, -+ { "Load", add_load_method_call, "00111011110110" }, - -- { "Load", add_load_method_call, {0,0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1}}, -+ { "Sample", add_sample_method_call, "00111111001000" }, -+ { "SampleBias", add_sample_lod_method_call, "00111111001000" }, -+ { "SampleCmp", add_sample_cmp_method_call, "00111111001000" }, -+ { "SampleCmpLevelZero", add_sample_cmp_method_call, "00111111001000" }, -+ { "SampleGrad", add_sample_grad_method_call, "00111111001000" }, -+ { "SampleLevel", add_sample_lod_method_call, "00111111001000" }, -+}; - -- { "Sample", add_sample_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleBias", add_sample_lod_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleCmp", add_sample_cmp_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleCmpLevelZero", add_sample_cmp_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleGrad", add_sample_grad_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -- { "SampleLevel", add_sample_lod_method_call, {0,0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0}}, -+static const struct method_function uav_methods[] = -+{ -+ { "Store", add_store_method_call, "00000000000001" }, -+ { "Store2", add_store_method_call, "00000000000001" }, -+ { "Store3", add_store_method_call, "00000000000001" }, -+ { "Store4", add_store_method_call, "00000000000001" }, - }; - - static int object_method_function_name_compare(const void *a, const void *b) -@@ -6225,7 +6289,8 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - const struct hlsl_type *object_type = object->data_type; -- const struct method_function *method; -+ const struct method_function *method, *methods; -+ unsigned int count; - - if (object_type->class == HLSL_CLASS_ERROR) - { -@@ -6242,7 +6307,17 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru - } - } - -- if (object_type->class != HLSL_CLASS_TEXTURE || object_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) -+ if (object_type->class == HLSL_CLASS_TEXTURE) -+ { -+ count = ARRAY_SIZE(texture_methods); -+ methods = texture_methods; -+ } -+ else if (object_type->class == HLSL_CLASS_UAV) -+ { -+ count = ARRAY_SIZE(uav_methods); -+ methods = uav_methods; -+ } -+ else - { - struct vkd3d_string_buffer *string; - -@@ -6253,10 +6328,10 @@ static bool add_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, stru - return false; - } - -- method = bsearch(name, object_methods, ARRAY_SIZE(object_methods), sizeof(*method), -+ method = bsearch(name, methods, count, sizeof(*method), - object_method_function_name_compare); - -- if (method && method->valid_dims[object_type->sampler_dim]) -+ if (method && method->valid_dims[object_type->sampler_dim] == '1') - { - return method->handler(ctx, block, object, name, params, loc); - } -@@ -6483,6 +6558,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_REGISTER - %token KW_ROW_MAJOR - %token KW_RWBUFFER -+%token KW_RWBYTEADDRESSBUFFER - %token KW_RWSTRUCTUREDBUFFER - %token KW_RWTEXTURE1D - %token KW_RWTEXTURE1DARRAY -@@ -7797,6 +7873,10 @@ type_no_void: - validate_uav_type(ctx, $1, $3, &@4); - $$ = hlsl_new_uav_type(ctx, $1, $3, true); - } -+ | KW_RWBYTEADDRESSBUFFER -+ { -+ $$ = hlsl_new_uav_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), false); -+ } - | KW_STRING - { - $$ = ctx->builtin_types.string; -@@ -8948,30 +9028,31 @@ postfix_expr: - { - if (!add_record_access_recurse(ctx, $1, $3, &@2)) - { -+ destroy_block($1); - vkd3d_free($3); - YYABORT; - } -- vkd3d_free($3); - } - else if (hlsl_is_numeric_type(node->data_type)) - { - struct hlsl_ir_node *swizzle; - -- if (!(swizzle = get_swizzle(ctx, node, $3, &@3))) -+ if ((swizzle = get_swizzle(ctx, node, $3, &@3))) -+ { -+ hlsl_block_add_instr($1, swizzle); -+ } -+ else - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid swizzle \"%s\".", $3); -- vkd3d_free($3); -- YYABORT; -+ $1->value = ctx->error_instr; - } -- hlsl_block_add_instr($1, swizzle); -- vkd3d_free($3); - } - else if (node->data_type->class != HLSL_CLASS_ERROR) - { - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid subscript \"%s\".", $3); -- vkd3d_free($3); -- YYABORT; -+ $1->value = ctx->error_instr; - } -+ vkd3d_free($3); - $$ = $1; - } - | postfix_expr '[' expr ']' -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 2cb56d6b493..ce431ee6815 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -4567,38 +4567,38 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - unsigned int first_write, unsigned int last_read, unsigned int reg_size, - unsigned int component_count, int mode, bool force_align) - { -+ struct hlsl_reg ret = {.allocation_size = 1, .allocated = true}; - unsigned int required_size = force_align ? 4 : reg_size; -- unsigned int writemask = 0, pref; -- struct hlsl_reg ret = {0}; -- uint32_t reg_idx; -+ unsigned int pref; - - VKD3D_ASSERT(component_count <= reg_size); - - pref = allocator->prioritize_smaller_writemasks ? 4 : required_size; - for (; pref >= required_size; --pref) - { -- for (reg_idx = 0; pref == required_size || reg_idx < allocator->reg_count; ++reg_idx) -+ for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx) - { - unsigned int available_writemask = get_available_writemask(allocator, - first_write, last_read, reg_idx, mode); - - if (vkd3d_popcount(available_writemask) >= pref) - { -- writemask = hlsl_combine_writemasks(available_writemask, (1u << reg_size) - 1); -- break; -+ unsigned int writemask = hlsl_combine_writemasks(available_writemask, -+ vkd3d_write_mask_from_component_count(reg_size)); -+ -+ ret.id = reg_idx; -+ ret.writemask = hlsl_combine_writemasks(writemask, -+ vkd3d_write_mask_from_component_count(component_count)); -+ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); -+ return ret; - } - } -- if (writemask) -- break; - } - -- VKD3D_ASSERT(vkd3d_popcount(writemask) == reg_size); -- record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode); -- -- ret.id = reg_idx; -- ret.allocation_size = 1; -- ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); -- ret.allocated = true; -+ ret.id = allocator->reg_count; -+ ret.writemask = vkd3d_write_mask_from_component_count(component_count); -+ record_allocation(ctx, allocator, allocator->reg_count, -+ vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode); - return ret; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 802fe221747..fb7ce063c85 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -1913,7 +1913,11 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - uint32_t scalar_id, type_id; - - VKD3D_ASSERT(component_type < VKD3D_SHADER_COMPONENT_TYPE_COUNT); -- VKD3D_ASSERT(1 <= component_count && component_count <= VKD3D_VEC4_SIZE); -+ if (!component_count || component_count > VKD3D_VEC4_SIZE) -+ { -+ ERR("Invalid component count %u.\n", component_count); -+ return 0; -+ } - - if ((type_id = builder->numeric_type_ids[component_type][component_count - 1])) - return type_id; -@@ -3192,6 +3196,14 @@ static bool spirv_compiler_get_register_name(char *buffer, unsigned int buffer_s - case VKD3DSPR_CONSTBUFFER: - snprintf(buffer, buffer_size, "cb%u_%u", reg->idx[0].offset, reg->idx[1].offset); - break; -+ case VKD3DSPR_RASTOUT: -+ if (idx == VSIR_RASTOUT_POINT_SIZE) -+ { -+ snprintf(buffer, buffer_size, "oPts"); -+ break; -+ } -+ FIXME("Unhandled rastout register %#x.\n", idx); -+ return false; - case VKD3DSPR_INPUT: - snprintf(buffer, buffer_size, "v%u", idx); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 2198b828b7c..befe5eacf9c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -3491,6 +3491,7 @@ static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *typ - case HLSL_SAMPLER_DIM_CUBEARRAY: - return D3D_SRV_DIMENSION_TEXTURECUBEARRAY; - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return D3D_SRV_DIMENSION_BUFFER; - default: -@@ -4019,6 +4020,7 @@ static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_typ - case HLSL_SAMPLER_DIM_CUBEARRAY: - return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY; - case HLSL_SAMPLER_DIM_BUFFER: -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - return VKD3D_SM4_RESOURCE_BUFFER; - default: -@@ -4808,6 +4810,9 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct - instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; - instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; - break; -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ instr.opcode = VKD3D_SM5_OP_DCL_UAV_RAW; -+ break; - default: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; - break; -@@ -5548,24 +5553,6 @@ static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_ - } - } - --static void write_sm4_store_uav_typed(const struct tpf_compiler *tpf, const struct hlsl_deref *dst, -- const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; -- -- sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, dst, &instr); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- - static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) - { - struct sm4_instruction instr; -@@ -6352,6 +6339,8 @@ static void write_sm4_resource_load(const struct tpf_compiler *tpf, const struct - static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struct hlsl_ir_resource_store *store) - { - struct hlsl_type *resource_type = hlsl_deref_get_type(tpf->ctx, &store->resource); -+ struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node; -+ struct sm4_instruction instr; - - if (!store->resource.var->is_uniform) - { -@@ -6365,7 +6354,25 @@ static void write_sm4_resource_store(const struct tpf_compiler *tpf, const struc - return; - } - -- write_sm4_store_uav_typed(tpf, &store->resource, store->coords.node, store->value.node); -+ memset(&instr, 0, sizeof(instr)); -+ -+ sm4_register_from_deref(tpf, &instr.dsts[0].reg, &instr.dsts[0].write_mask, &store->resource, &instr); -+ instr.dst_count = 1; -+ if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ instr.opcode = VKD3D_SM5_OP_STORE_RAW; -+ instr.dsts[0].write_mask = vkd3d_write_mask_from_component_count(value->data_type->dimx); -+ } -+ else -+ { -+ instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED; -+ } -+ -+ sm4_src_from_node(tpf, &instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL); -+ sm4_src_from_node(tpf, &instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); - } - - static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir_store *store) -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 54a39e18a0f..f2009a64bd2 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -1696,7 +1696,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; - VkPhysicalDevice physical_device = device->vk_physical_device; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -- VkExtensionProperties *vk_extensions; -+ VkExtensionProperties *vk_extensions = NULL; - VkPhysicalDeviceFeatures *features; - uint32_t vk_extension_count; - HRESULT hr; --- -2.45.2 - diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch deleted file mode 100644 index a52818d8..00000000 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch +++ /dev/null @@ -1,1338 +0,0 @@ -From 52aa786dbdf0eb459f69ac8058419df437ae0b20 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Tue, 5 Nov 2024 07:37:21 +1100 -Subject: [PATCH] Updated vkd3d to 794f4c30f4873841aaa0c5c9745ee732437e70db. - ---- - libs/vkd3d/libs/vkd3d-shader/fx.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 265 +++++++++--- - libs/vkd3d/libs/vkd3d-shader/ir.c | 397 +++++++++++++++--- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 92 +++- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 3 + - .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + - 6 files changed, 637 insertions(+), 127 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 8954feb22b7..5382dd94f98 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -3295,7 +3295,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int - - parse_fx_print_indent(parser); - vkd3d_string_buffer_printf(&parser->buffer, "}"); -- if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER && gs_so.sodecl) -+ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) - { - vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", - fx_4_get_string(parser, gs_so.sodecl)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index ce431ee6815..213e403dcbd 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -6579,7 +6579,43 @@ static void generate_vsir_signature(struct hlsl_ctx *ctx, - } - } - --static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) -+static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) -+{ -+ if (hlsl_version_lt(ctx, 4, 0)) -+ return VKD3D_DATA_FLOAT; -+ -+ if (type->class == HLSL_CLASS_ARRAY) -+ return vsir_data_type_from_hlsl_type(ctx, type->e.array.type); -+ if (type->class == HLSL_CLASS_STRUCT) -+ return VKD3D_DATA_MIXED; -+ if (type->class <= HLSL_CLASS_LAST_NUMERIC) -+ { -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ return VKD3D_DATA_DOUBLE; -+ case HLSL_TYPE_FLOAT: -+ return VKD3D_DATA_FLOAT; -+ case HLSL_TYPE_HALF: -+ return VKD3D_DATA_HALF; -+ case HLSL_TYPE_INT: -+ return VKD3D_DATA_INT; -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ return VKD3D_DATA_UINT; -+ } -+ } -+ -+ vkd3d_unreachable(); -+} -+ -+static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx, -+ const struct hlsl_ir_node *instr) -+{ -+ return vsir_data_type_from_hlsl_type(ctx, instr->data_type); -+} -+ -+static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) - { - uint32_t swizzle; - -@@ -6737,6 +6773,50 @@ static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( - return ins; - } - -+static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src, -+ struct hlsl_ctx *ctx, const struct hlsl_constant_value *value, -+ enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask) -+{ -+ unsigned int i, j; -+ -+ vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0); -+ if (width == 1) -+ { -+ src->reg.u.immconst_u32[0] = value->u[0].u; -+ return; -+ } -+ -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ for (i = 0, j = 0; i < 4; ++i) -+ { -+ if ((map_writemask & (1u << i)) && (j < width)) -+ src->reg.u.immconst_u32[i] = value->u[j++].u; -+ else -+ src->reg.u.immconst_u32[i] = 0; -+ } -+} -+ -+static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, -+ struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) -+{ -+ struct hlsl_ir_constant *constant; -+ -+ if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT) -+ { -+ /* In SM4 constants are inlined */ -+ constant = hlsl_ir_constant(instr); -+ vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, -+ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); -+ } -+ else -+ { -+ vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); -+ src->reg.idx[0].offset = instr->reg.id; -+ src->reg.dimension = VSIR_DIMENSION_VEC4; -+ src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); -+ } -+} -+ - static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_constant *constant) - { -@@ -6754,7 +6834,7 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = constant->reg.id; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); -+ src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); - - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -@@ -6763,9 +6843,9 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - } - - /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ --static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, -- struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod, -- bool map_src_swizzles) -+static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, -+ uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) - { - struct hlsl_ir_node *instr = &expr->node; - struct vkd3d_shader_dst_param *dst_param; -@@ -6786,8 +6866,9 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s - return; - - dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); - dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; - dst_param->write_mask = instr->reg.writemask; - dst_param->modifiers = dst_mod; - -@@ -6796,9 +6877,7 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s - struct hlsl_ir_node *operand = expr->operands[i].node; - - src_param = &ins->src[i]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = operand->reg.id; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, -+ vsir_src_from_hlsl_node(src_param, ctx, operand, - map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); - src_param->modifiers = src_mod; - } -@@ -6820,7 +6899,7 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx - VKD3D_ASSERT(instr->reg.allocated); - VKD3D_ASSERT(operand); - -- src_swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); -+ src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); - for (i = 0; i < 4; ++i) - { - if (instr->reg.writemask & (1u << i)) -@@ -6866,7 +6945,7 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi - src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = operand->reg.id; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); -+ src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); - - if (ctx->profile->major_version < 3) - { -@@ -6908,13 +6987,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); - return true; - - case HLSL_TYPE_DOUBLE: - if (ctx->double_as_float_alias) - { -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); - return true; - } - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -@@ -6939,7 +7018,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); - return true; - - case HLSL_TYPE_BOOL: -@@ -6961,7 +7040,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, - case HLSL_TYPE_FLOAT: - if (ctx->double_as_float_alias) - { -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); - return true; - } - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -@@ -7001,7 +7080,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - switch (expr->op) - { - case HLSL_OP1_ABS: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); - break; - - case HLSL_OP1_CAST: -@@ -7013,11 +7092,11 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - break; - - case HLSL_OP1_DSX: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); - break; - - case HLSL_OP1_DSY: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); - break; - - case HLSL_OP1_EXP2: -@@ -7029,7 +7108,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - break; - - case HLSL_OP1_NEG: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); - break; - - case HLSL_OP1_RCP: -@@ -7037,7 +7116,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - break; - - case HLSL_OP1_REINTERPRET: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); - break; - - case HLSL_OP1_RSQ: -@@ -7045,7 +7124,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - break; - - case HLSL_OP1_SAT: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); - break; - - case HLSL_OP1_SIN_REDUCED: -@@ -7054,18 +7133,18 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - break; - - case HLSL_OP2_ADD: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); - break; - - case HLSL_OP2_DOT: - switch (expr->operands[0].node->data_type->dimx) - { - case 3: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); - break; - - case 4: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); - break; - - default: -@@ -7075,43 +7154,43 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr - break; - - case HLSL_OP2_MAX: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); - break; - - case HLSL_OP2_MIN: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); - break; - - case HLSL_OP2_MUL: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); - break; - - case HLSL_OP1_FRACT: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); - break; - - case HLSL_OP2_LOGIC_AND: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); - break; - - case HLSL_OP2_LOGIC_OR: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); - break; - - case HLSL_OP2_SLT: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); - break; - - case HLSL_OP3_CMP: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); - break; - - case HLSL_OP3_DP2ADD: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); - break; - - case HLSL_OP3_MAD: -- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); - break; - - default: -@@ -7227,7 +7306,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, - - vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = register_index; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(writemask, dst_writemask); -+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); - - if (deref->rel_offset.node) - hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); -@@ -7306,9 +7385,7 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, - dst_param->write_mask = instr->reg.writemask; - - src_param = &ins->src[0]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = coords->reg.id; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); - - sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, - VKD3DSP_WRITEMASK_ALL, &ins->location); -@@ -7316,19 +7393,15 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, - if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) - { - src_param = &ins->src[2]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = ddx->reg.id; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL); - - src_param = &ins->src[3]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = ddy->reg.id; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL); -+ vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL); - } - } - --static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, -- struct hlsl_ir_swizzle *swizzle_instr) -+static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) - { - struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; - struct vkd3d_shader_dst_param *dst_param; -@@ -7342,8 +7415,9 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr - return; - - dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); - dst_param->reg.idx[0].offset = instr->reg.id; -+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; - dst_param->write_mask = instr->reg.writemask; - - swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); -@@ -7352,8 +7426,10 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr - swizzle = vsir_swizzle_from_hlsl(swizzle); - - src_param = &ins->src[0]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); -+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1); - src_param->reg.idx[0].offset = val->reg.id; -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; - src_param->swizzle = swizzle; - } - -@@ -7371,9 +7447,7 @@ static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_prog - sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask); - - src_param = &ins->src[0]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = rhs->reg.id; -- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(rhs->reg.writemask, ins->dst[0].write_mask); -+ vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask); - } - - static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, -@@ -7408,7 +7482,6 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program - struct vkd3d_shader_src_param *src_param; - struct hlsl_ir_node *instr = &iff->node; - struct vkd3d_shader_instruction *ins; -- uint32_t swizzle; - - if (hlsl_version_lt(ctx, 2, 1)) - { -@@ -7421,19 +7494,12 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program - return; - ins->flags = VKD3D_SHADER_REL_OP_NE; - -- swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask); -- swizzle = vsir_swizzle_from_hlsl(swizzle); -- - src_param = &ins->src[0]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = condition->reg.id; -- src_param->swizzle = swizzle; -+ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); - src_param->modifiers = 0; - - src_param = &ins->src[1]; -- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -- src_param->reg.idx[0].offset = condition->reg.id; -- src_param->swizzle = swizzle; -+ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); - src_param->modifiers = VKD3DSPSM_NEG; - - sm1_generate_vsir_block(ctx, &iff->then_block, program); -@@ -7496,7 +7562,7 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo - break; - - case HLSL_IR_SWIZZLE: -- sm1_generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); - break; - - default: -@@ -7557,6 +7623,25 @@ static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_progr - hlsl_block_add_instr(block, vsir_instr); - } - -+static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_node *instr) -+{ -+ struct vkd3d_shader_location *loc; -+ struct hlsl_ir_node *vsir_instr; -+ -+ loc = &program->instructions.elements[program->instructions.count - 1].location; -+ -+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, -+ program->instructions.count - 1, instr->data_type, &instr->reg, loc))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ list_add_before(&instr->entry, &vsir_instr->entry); -+ hlsl_replace_node(instr, vsir_instr); -+} -+ - static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, - uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) - { -@@ -7589,6 +7674,60 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, - add_last_vsir_instr_to_block(ctx, program, block); - } - -+static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr) -+{ -+ switch (expr->op) -+ { -+ case HLSL_OP1_ABS: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) -+{ -+ struct hlsl_ir_node *instr, *next; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->data_type) -+ { -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -+ break; -+ } -+ } -+ -+ switch (instr->type) -+ { -+ case HLSL_IR_CALL: -+ vkd3d_unreachable(); -+ -+ case HLSL_IR_CONSTANT: -+ /* In SM4 all constants are inlined. */ -+ break; -+ -+ case HLSL_IR_EXPR: -+ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr))) -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ break; -+ -+ case HLSL_IR_SWIZZLE: -+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ break; -+ -+ default: -+ break; -+ } -+ } -+} -+ - static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) - { -@@ -7631,6 +7770,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, - list_move_head(&func->body.instrs, &block.instrs); - - hlsl_block_cleanup(&block); -+ -+ sm4_generate_vsir_block(ctx, &func->body, program); - } - - /* OBJECTIVE: Translate all the information from ctx and entry_func to the -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 19dc36d9191..9b50a308e11 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -129,6 +129,38 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( - return NULL; - } - -+static struct signature_element *vsir_signature_find_element_by_name( -+ const struct shader_signature *signature, const char *semantic_name, unsigned int semantic_index) -+{ -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ { -+ if (!ascii_strcasecmp(signature->elements[i].semantic_name, semantic_name) -+ && signature->elements[i].semantic_index == semantic_index) -+ return &signature->elements[i]; -+ } -+ -+ return NULL; -+} -+ -+static bool vsir_signature_find_sysval(const struct shader_signature *signature, -+ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index) -+{ -+ const struct signature_element *e; -+ unsigned int i; -+ -+ for (i = 0; i < signature->element_count; ++i) -+ { -+ e = &signature->elements[i]; -+ if (e->sysval_semantic == sysval && e->semantic_index == semantic_index) -+ { -+ *element_index = i; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ - void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, - enum vkd3d_data_type data_type, unsigned int idx_count) - { -@@ -277,6 +309,15 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne - dst->reg.idx[0].offset = idx; - } - -+static void dst_param_init_output(struct vkd3d_shader_dst_param *dst, -+ enum vkd3d_data_type data_type, uint32_t idx, uint32_t write_mask) -+{ -+ vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, data_type, 1); -+ dst->reg.idx[0].offset = idx; -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+ dst->write_mask = write_mask; -+} -+ - void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode opcode) - { -@@ -789,6 +830,98 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i - shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); - } - -+/* Ensure that the program closes with a ret. sm1 programs do not, by default. -+ * Many of our IR passes rely on this in order to insert instructions at the -+ * end of execution. */ -+static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ static const struct vkd3d_shader_location no_loc; -+ if (program->instructions.count -+ && program->instructions.elements[program->instructions.count - 1].opcode == VKD3DSIH_RET) -+ return VKD3D_OK; -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, program->instructions.count, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ vsir_instruction_init(&program->instructions.elements[program->instructions.count - 1], &no_loc, VKD3DSIH_RET); -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ struct shader_signature *signature = &program->output_signature; -+ struct signature_element *new_elements, *e; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) -+ return VKD3D_OK; -+ -+ if ((e = vsir_signature_find_element_by_name(signature, "COLOR", 0))) -+ { -+ program->diffuse_written_mask = e->mask; -+ e->mask = VKD3DSP_WRITEMASK_ALL; -+ -+ return VKD3D_OK; -+ } -+ -+ if (!(new_elements = vkd3d_realloc(signature->elements, -+ (signature->element_count + 1) * sizeof(*signature->elements)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ signature->elements = new_elements; -+ e = &signature->elements[signature->element_count++]; -+ memset(e, 0, sizeof(*e)); -+ e->semantic_name = vkd3d_strdup("COLOR"); -+ e->sysval_semantic = VKD3D_SHADER_SV_NONE; -+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ e->register_count = 1; -+ e->mask = VKD3DSP_WRITEMASK_ALL; -+ e->used_mask = VKD3DSP_WRITEMASK_ALL; -+ e->register_index = SM1_COLOR_REGISTER_OFFSET; -+ e->target_location = SM1_COLOR_REGISTER_OFFSET; -+ e->interpolation_mode = VKD3DSIM_NONE; -+ -+ return VKD3D_OK; -+} -+ -+/* Uninitialized components of diffuse yield 1.0 in SM1-2. Implement this by -+ * always writing diffuse in those versions, even if the PS doesn't read it. */ -+static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *program, -+ struct vsir_transformation_context *ctx) -+{ -+ static const struct vkd3d_shader_location no_loc; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -+ || program->diffuse_written_mask == VKD3DSP_WRITEMASK_ALL) -+ return VKD3D_OK; -+ -+ /* Write the instruction after all LABEL, DCL, and NOP instructions. -+ * We need to skip NOP instructions because they might result from removed -+ * DCLs, and there could still be DCLs after NOPs. */ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) -+ break; -+ } -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &program->instructions.elements[i]; -+ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = 0; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask; -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ for (i = 0; i < 4; ++i) -+ ins->src[0].reg.u.immconst_f32[i] = 1.0f; -+ return VKD3D_OK; -+} -+ - static const struct vkd3d_shader_varying_map *find_varying_map( - const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) - { -@@ -803,14 +936,88 @@ static const struct vkd3d_shader_varying_map *find_varying_map( - return NULL; - } - -+static bool target_allows_subset_masks(const struct vkd3d_shader_compile_info *info) -+{ -+ const struct vkd3d_shader_spirv_target_info *spirv_info; -+ enum vkd3d_shader_spirv_environment environment; -+ -+ switch (info->target_type) -+ { -+ case VKD3D_SHADER_TARGET_SPIRV_BINARY: -+ spirv_info = vkd3d_find_struct(info->next, SPIRV_TARGET_INFO); -+ environment = spirv_info ? spirv_info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; -+ -+ switch (environment) -+ { -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: -+ return true; -+ -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: -+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: -+ /* FIXME: Allow KHR_maintenance4. */ -+ return false; -+ -+ default: -+ FIXME("Unrecognized environment %#x.\n", environment); -+ return false; -+ } -+ -+ default: -+ return true; -+ } -+} -+ -+static void remove_unread_output_components(const struct shader_signature *signature, -+ struct vkd3d_shader_instruction *ins, struct vkd3d_shader_dst_param *dst) -+{ -+ const struct signature_element *e; -+ -+ switch (dst->reg.type) -+ { -+ case VKD3DSPR_OUTPUT: -+ e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); -+ break; -+ -+ case VKD3DSPR_ATTROUT: -+ e = vsir_signature_find_element_for_reg(signature, -+ SM1_COLOR_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); -+ break; -+ -+ case VKD3DSPR_RASTOUT: -+ e = vsir_signature_find_element_for_reg(signature, -+ SM1_RASTOUT_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); -+ break; -+ -+ default: -+ return; -+ } -+ -+ /* We already changed the mask earlier. */ -+ dst->write_mask &= e->mask; -+ -+ if (!dst->write_mask) -+ { -+ if (ins->dst_count == 1) -+ vkd3d_shader_instruction_make_nop(ins); -+ else -+ vsir_dst_param_init(dst, VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); -+ } -+} -+ - static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, - struct vsir_transformation_context *ctx) - { - const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name}; - struct vkd3d_shader_message_context *message_context = ctx->message_context; - const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info; -+ bool allows_subset_masks = target_allows_subset_masks(compile_info); - struct shader_signature *signature = &program->output_signature; -+ unsigned int orig_element_count = signature->element_count; - const struct vkd3d_shader_varying_map_info *varying_map; -+ struct signature_element *new_elements, *e; -+ unsigned int uninit_varying_count = 0; -+ unsigned int subset_varying_count = 0; -+ unsigned int new_register_count = 0; - unsigned int i; - - if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO))) -@@ -819,22 +1026,29 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program - for (i = 0; i < signature->element_count; ++i) - { - const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i); -- struct signature_element *e = &signature->elements[i]; - -+ e = &signature->elements[i]; - if (map) - { - unsigned int input_mask = map->input_mask; - - e->target_location = map->input_register_index; - -- /* It is illegal in Vulkan if the next shader uses the same varying -- * location with a different mask. */ -- if (input_mask && input_mask != e->mask) -+ if ((input_mask & e->mask) == input_mask) -+ { -+ ++subset_varying_count; -+ if (!allows_subset_masks) -+ { -+ e->mask = input_mask; -+ e->used_mask &= input_mask; -+ } -+ } -+ else if (input_mask && input_mask != e->mask) - { - vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " -- "Output mask %#x does not match input mask %#x.", -- e->mask, input_mask); -+ "Input mask %#x reads components not written in output mask %#x.", -+ input_mask, e->mask); - return VKD3D_ERROR_NOT_IMPLEMENTED; - } - } -@@ -842,17 +1056,103 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program - { - e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; - } -+ -+ new_register_count = max(new_register_count, e->register_index + 1); - } - -+ /* Handle uninitialized varyings by writing them before every ret. -+ * -+ * As far as sm1-sm3 is concerned, drivers disagree on what uninitialized -+ * varyings contain. -+ * -+ * - Diffuse (COLOR0) reliably contains (1, 1, 1, 1) in SM1/2. -+ * In SM3 it may contain (0, 0, 0, 0), (0, 0, 0, 1), or (1, 1, 1, 1). -+ * -+ * - Specular (COLOR1) contains (0, 0, 0, 0) or (0, 0, 0, 1). -+ * WARP writes (1, 1, 1, 1). -+ * -+ * - Anything else contains (0, 0, 0, 0) or (0, 0, 0, 1). -+ * -+ * We don't have enough knowledge to identify diffuse here. Instead we deal -+ * with that in vsir_program_ensure_diffuse(), by always writing diffuse if -+ * the shader doesn't. -+ */ -+ - for (i = 0; i < varying_map->varying_count; ++i) - { - if (varying_map->varying_map[i].output_signature_index >= signature->element_count) -+ ++uninit_varying_count; -+ } -+ -+ if (!(new_elements = vkd3d_realloc(signature->elements, -+ (signature->element_count + uninit_varying_count) * sizeof(*signature->elements)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ signature->elements = new_elements; -+ -+ for (i = 0; i < varying_map->varying_count; ++i) -+ { -+ const struct vkd3d_shader_varying_map *map = &varying_map->varying_map[i]; -+ -+ if (map->output_signature_index < orig_element_count) -+ continue; -+ -+ TRACE("Synthesizing zero value for uninitialized output %u (mask %u).\n", -+ map->input_register_index, map->input_mask); -+ e = &signature->elements[signature->element_count++]; -+ memset(e, 0, sizeof(*e)); -+ e->sysval_semantic = VKD3D_SHADER_SV_NONE; -+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ e->register_count = 1; -+ e->mask = map->input_mask; -+ e->used_mask = map->input_mask; -+ e->register_index = new_register_count++; -+ e->target_location = map->input_register_index; -+ e->interpolation_mode = VKD3DSIM_LINEAR; -+ } -+ -+ /* Write each uninitialized varying before each ret. */ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ struct vkd3d_shader_location loc; -+ -+ if (ins->opcode != VKD3DSIH_RET) -+ continue; -+ -+ loc = ins->location; -+ if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[i]; -+ -+ for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) - { -- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -- "Aborting due to not yet implemented feature: " -- "The next stage consumes varyings not written by this stage."); -- return VKD3D_ERROR_NOT_IMPLEMENTED; -+ e = &signature->elements[j]; -+ -+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); -+ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, e->register_index, e->mask); -+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ++ins; - } -+ -+ i += uninit_varying_count; -+ } -+ -+ /* Vulkan (without KHR_maintenance4) disallows any mismatching masks, -+ * including when the input mask is a proper subset of the output mask. -+ * Resolve this by rewriting the shader to remove unread components from -+ * any writes to the output variable. */ -+ -+ if (!subset_varying_count || allows_subset_masks) -+ return VKD3D_OK; -+ -+ for (i = 0; i < program->instructions.count; ++i) -+ { -+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; -+ -+ for (unsigned int j = 0; j < ins->dst_count; ++j) -+ remove_unread_output_components(signature, ins, &ins->dst[j]); - } - - return VKD3D_OK; -@@ -2490,15 +2790,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla - static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, - struct vkd3d_shader_message_context *message_context) - { -- bool main_block_open, is_hull_shader, after_declarations_section; - struct vkd3d_shader_instruction_array *instructions; - struct vsir_program *program = flattener->program; -+ bool is_hull_shader, after_declarations_section; - struct vkd3d_shader_instruction *dst_ins; - size_t i; - - instructions = &program->instructions; - is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL; -- main_block_open = !is_hull_shader; - after_declarations_section = is_hull_shader; - - if (!cf_flattener_require_space(flattener, instructions->count + 1)) -@@ -2822,8 +3121,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - - if (cf_info) - cf_info->inside_block = false; -- else -- main_block_open = false; - break; - - default: -@@ -2833,14 +3130,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - } - } - -- if (main_block_open) -- { -- if (!(dst_ins = cf_flattener_require_space(flattener, 1))) -- return VKD3D_ERROR_OUT_OF_MEMORY; -- vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET); -- ++flattener->instruction_count; -- } -- - return flattener->status; - } - -@@ -5564,21 +5853,6 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - return VKD3D_OK; - } - --static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) --{ -- for (unsigned int i = 0; i < signature->element_count; ++i) -- { -- if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET -- && !signature->elements[i].register_index) -- { -- *index = i; -- return true; -- } -- } -- -- return false; --} -- - static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, - const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, - const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, -@@ -5690,7 +5964,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro - if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - return VKD3D_OK; - -- if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) -+ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx) - || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) - return VKD3D_OK; - -@@ -5808,21 +6082,6 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog - return VKD3D_OK; - } - --static bool find_sysval_signature_idx(const struct shader_signature *signature, -- enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx) --{ -- for (unsigned int i = 0; i < signature->element_count; ++i) -- { -- if (signature->elements[i].sysval_semantic == sysval) -- { -- *idx = i; -- return true; -- } -- } -- -- return false; --} -- - static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program, - struct vsir_transformation_context *ctx) - { -@@ -5878,7 +6137,7 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr - } - } - -- if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx)) -+ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx)) - { - vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, - "Shader does not write position."); -@@ -7988,6 +8247,31 @@ static void vsir_transform_( - } - } - -+/* Transformations which should happen at parse time, i.e. before scan -+ * information is returned to the user. -+ * -+ * In particular, some passes need to modify the signature, and -+ * vkd3d_shader_scan() should report the modified signature for the given -+ * target. */ -+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) -+{ -+ struct vsir_transformation_context ctx = -+ { -+ .result = VKD3D_OK, -+ .program = program, -+ .config_flags = config_flags, -+ .compile_info = compile_info, -+ .message_context = message_context, -+ }; -+ -+ /* For vsir_program_ensure_diffuse(). */ -+ if (program->shader_version.major <= 2) -+ vsir_transform(&ctx, vsir_program_add_diffuse_output); -+ -+ return ctx.result; -+} -+ - enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) - { -@@ -8012,6 +8296,11 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t - } - else - { -+ vsir_transform(&ctx, vsir_program_ensure_ret); -+ -+ if (program->shader_version.major <= 2) -+ vsir_transform(&ctx, vsir_program_ensure_diffuse); -+ - if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) - vsir_transform(&ctx, vsir_program_remap_output_signature); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index befe5eacf9c..bbd2f761d29 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -674,6 +674,7 @@ struct sm4_index_range_array - struct vkd3d_sm4_lookup_tables - { - const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT]; -+ const struct vkd3d_sm4_opcode_info *opcode_info_from_vsir[VKD3DSIH_COUNT]; - const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; - const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; - const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT]; -@@ -1412,6 +1413,8 @@ struct tpf_compiler - struct vkd3d_sm4_lookup_tables lookup; - struct sm4_stat *stat; - -+ int result; -+ - struct vkd3d_bytecode_buffer *buffer; - struct dxbc_writer dxbc; - }; -@@ -1903,6 +1906,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - const struct vkd3d_sm4_opcode_info *info = &opcode_table[i]; - - lookup->opcode_info_from_sm4[info->opcode] = info; -+ lookup->opcode_info_from_vsir[info->handler_idx] = info; - } - - for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) -@@ -1929,6 +1933,24 @@ static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( - return lookup->opcode_info_from_sm4[sm4_opcode]; - } - -+static const struct vkd3d_sm4_opcode_info *get_info_from_vsir_opcode( -+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_opcode vsir_opcode) -+{ -+ if (vsir_opcode >= VKD3DSIH_COUNT) -+ return NULL; -+ return lookup->opcode_info_from_vsir[vsir_opcode]; -+} -+ -+static unsigned int opcode_info_get_dst_count(const struct vkd3d_sm4_opcode_info *info) -+{ -+ return strnlen(info->dst_info, SM4_MAX_DST_COUNT); -+} -+ -+static unsigned int opcode_info_get_src_count(const struct vkd3d_sm4_opcode_info *info) -+{ -+ return strnlen(info->src_info, SM4_MAX_SRC_COUNT); -+} -+ - static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( - const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) - { -@@ -2651,8 +2673,8 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - ins->raw = false; - ins->structured = false; - ins->predicate = NULL; -- ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); -- ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); -+ ins->dst_count = opcode_info_get_dst_count(opcode_info); -+ ins->src_count = opcode_info_get_src_count(opcode_info); - ins->src = src_params = vsir_program_get_src_params(program, ins->src_count); - if (!src_params && ins->src_count) - { -@@ -2971,7 +2993,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - return VKD3D_OK; - } - --static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); -+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); - - static bool type_is_integer(const struct hlsl_type *type) - { -@@ -6094,11 +6116,12 @@ static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_ - hlsl_release_string_buffer(tpf->ctx, dst_type_string); - } - --static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) -+static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, -+ .opcode = VKD3D_SM4_OP_IF, -+ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, - .src_count = 1, - }; - -@@ -6210,7 +6233,7 @@ static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_ - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) -+static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) - { - struct sm4_instruction instr = - { -@@ -6394,7 +6417,7 @@ static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) -+static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) - { - const struct hlsl_ir_node *selector = s->selector.node; - struct hlsl_ir_switch_case *c; -@@ -6455,7 +6478,46 @@ static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ - write_sm4_instruction(tpf, &instr); - } - --static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) -+static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) -+{ -+ const struct vkd3d_sm4_opcode_info *info; -+ struct sm4_instruction instr = {0}; -+ unsigned int dst_count, src_count; -+ -+ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); -+ VKD3D_ASSERT(info); -+ -+ dst_count = opcode_info_get_dst_count(info); -+ src_count = opcode_info_get_src_count(info); -+ -+ if (ins->dst_count != dst_count) -+ { -+ ERR("Invalid destination count %u for vsir instruction %#x (expected %u).\n", -+ ins->dst_count, ins->opcode, dst_count); -+ tpf->result = VKD3D_ERROR_INVALID_SHADER; -+ return; -+ } -+ if (ins->src_count != src_count) -+ { -+ ERR("Invalid source count %u for vsir instruction %#x (expected %u).\n", -+ ins->src_count, ins->opcode, src_count); -+ tpf->result = VKD3D_ERROR_INVALID_SHADER; -+ return; -+ } -+ -+ instr.opcode = info->opcode; -+ instr.dst_count = ins->dst_count; -+ instr.src_count = ins->src_count; -+ -+ for (unsigned int i = 0; i < ins->dst_count; ++i) -+ instr.dsts[i] = ins->dst[i]; -+ for (unsigned int i = 0; i < ins->src_count; ++i) -+ instr.srcs[i] = ins->src[i]; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ -+static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) - { - switch (ins->opcode) - { -@@ -6467,13 +6529,17 @@ static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct - tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); - break; - -+ case VKD3DSIH_MOV: -+ tpf_simple_instruction(tpf, ins); -+ break; -+ - default: - vkd3d_unreachable(); - break; - } - } - --static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) -+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) - { - const struct hlsl_ir_node *instr; - unsigned int vsir_instr_idx; -@@ -6765,7 +6831,13 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, - tpf_write_sfi0(&tpf); - tpf_write_stat(&tpf); - -- if (!(ret = ctx->result)) -+ ret = VKD3D_OK; -+ if (ctx->result) -+ ret = ctx->result; -+ if (tpf.result) -+ ret = tpf.result; -+ -+ if (!ret) - ret = dxbc_writer_write(&tpf.dxbc, out); - for (i = 0; i < tpf.dxbc.section_count; ++i) - vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 3355e18b88e..3afac9a38a4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -719,8 +719,11 @@ static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *comp - vsir_program_trace(program); - - vsir_program_cleanup(program); -+ return ret; - } - -+ if (compile_info->target_type != VKD3D_SHADER_TARGET_NONE) -+ ret = vsir_program_transform_early(program, config_flags, compile_info, message_context); - return ret; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 5ae938e0525..1b6c37343d1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -585,6 +585,8 @@ enum vkd3d_shader_opcode - VKD3DSIH_XOR, - - VKD3DSIH_INVALID, -+ -+ VKD3DSIH_COUNT, - }; - - enum vkd3d_shader_register_type -@@ -1424,6 +1426,7 @@ struct vsir_program - bool use_vocp; - bool has_point_size; - bool has_point_coord; -+ uint8_t diffuse_written_mask; - enum vsir_control_flow_type cf_type; - enum vsir_normalisation_level normalisation_level; - -@@ -1442,6 +1445,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c - enum vsir_normalisation_level normalisation_level); - enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); -+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, - const char *source_name, struct vkd3d_shader_message_context *message_context); - struct vkd3d_shader_src_param *vsir_program_create_outpointid_param( --- -2.45.2 - diff --git a/patches/vkd3d-latest/0007-Updated-vkd3d-to-756b98f093ba26e8cd4d4fed1caa04a5c0d.patch b/patches/vkd3d-latest/0007-Updated-vkd3d-to-756b98f093ba26e8cd4d4fed1caa04a5c0d.patch deleted file mode 100644 index a45d352f..00000000 --- a/patches/vkd3d-latest/0007-Updated-vkd3d-to-756b98f093ba26e8cd4d4fed1caa04a5c0d.patch +++ /dev/null @@ -1,3253 +0,0 @@ -From a38de601ec795892cf1b281f11f4320c65518774 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 7 Nov 2024 12:23:53 +1100 -Subject: [PATCH] Updated vkd3d to 756b98f093ba26e8cd4d4fed1caa04a5c0d0bc35. - ---- - libs/vkd3d/libs/vkd3d-shader/fx.c | 47 +- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 133 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 6 + - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 113 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 954 ++++++++++++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 25 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 11 +- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 1182 +++-------------- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 + - libs/vkd3d/libs/vkd3d/command.c | 30 +- - 11 files changed, 1392 insertions(+), 1111 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 5382dd94f98..9b1ef3bb2e0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -514,6 +514,8 @@ enum fx_4_type_constants - FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY = 0xf, - FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10, - FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11, -+ FX_4_OBJECT_TYPE_RTV = 0x13, -+ FX_4_OBJECT_TYPE_DSV = 0x14, - FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17, - - FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b, -@@ -527,7 +529,12 @@ enum fx_4_type_constants - FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22, - FX_5_OBJECT_TYPE_UAV_3D = 0x23, - FX_5_OBJECT_TYPE_UAV_BUFFER = 0x24, -+ FX_5_OBJECT_TYPE_SRV_RAW_BUFFER = 0x25, -+ FX_5_OBJECT_TYPE_UAV_RAW_BUFFER = 0x26, -+ FX_5_OBJECT_TYPE_SRV_STRUCTURED_BUFFER = 0x27, - FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER = 0x28, -+ FX_5_OBJECT_TYPE_SRV_APPEND_STRUCTURED_BUFFER = 0x2b, -+ FX_5_OBJECT_TYPE_SRV_CONSUME_STRUCTURED_BUFFER = 0x2c, - - /* Types */ - FX_4_TYPE_CLASS_NUMERIC = 1, -@@ -613,6 +620,7 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - [HLSL_SAMPLER_DIM_3D] = "RWTexture3D", - [HLSL_SAMPLER_DIM_BUFFER] = "RWBuffer", - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", -+ [HLSL_SAMPLER_DIM_RAW_BUFFER] = "RWByteAddressBuffer", - }; - - switch (type->class) -@@ -821,17 +829,18 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - [HLSL_SAMPLER_DIM_3D] = FX_5_OBJECT_TYPE_UAV_3D, - [HLSL_SAMPLER_DIM_BUFFER] = FX_5_OBJECT_TYPE_UAV_BUFFER, - [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER, -+ [HLSL_SAMPLER_DIM_RAW_BUFFER] = FX_5_OBJECT_TYPE_UAV_RAW_BUFFER, - }; - - put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); - } - else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) - { -- put_u32_unaligned(buffer, 20); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DSV); - } - else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) - { -- put_u32_unaligned(buffer, 19); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RTV); - } - else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) - { -@@ -3315,27 +3324,19 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int - vkd3d_shader_free_shader_code(&output); - } - --static bool fx_4_is_shader_resource(const struct fx_4_binary_type *type) -+static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) - { - switch (type->typeinfo) - { -- case FX_4_OBJECT_TYPE_TEXTURE: -- case FX_4_OBJECT_TYPE_TEXTURE_1D: -- case FX_4_OBJECT_TYPE_TEXTURE_1DARRAY: -- case FX_4_OBJECT_TYPE_TEXTURE_2D: -- case FX_4_OBJECT_TYPE_TEXTURE_2DARRAY: -- case FX_4_OBJECT_TYPE_TEXTURE_2DMS: -- case FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY: -- case FX_4_OBJECT_TYPE_TEXTURE_3D: -- case FX_4_OBJECT_TYPE_TEXTURE_CUBE: -- case FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY: -- case FX_5_OBJECT_TYPE_UAV_1D: -- case FX_5_OBJECT_TYPE_UAV_1DARRAY: -- case FX_5_OBJECT_TYPE_UAV_2D: -- case FX_5_OBJECT_TYPE_UAV_2DARRAY: -- case FX_5_OBJECT_TYPE_UAV_3D: -- case FX_5_OBJECT_TYPE_UAV_BUFFER: -- case FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER: -+ case FX_4_OBJECT_TYPE_STRING: -+ case FX_4_OBJECT_TYPE_PIXEL_SHADER: -+ case FX_4_OBJECT_TYPE_VERTEX_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO: -+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER: -+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER: -+ case FX_5_OBJECT_TYPE_HULL_SHADER: -+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER: - return true; - default: - return false; -@@ -3347,6 +3348,9 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct - unsigned int i, element_count; - uint32_t value; - -+ if (!fx_4_object_has_initializer(type)) -+ return; -+ - vkd3d_string_buffer_printf(&parser->buffer, " = {\n"); - element_count = max(type->element_count, 1); - for (i = 0; i < element_count; ++i) -@@ -3407,8 +3411,7 @@ static void fx_4_parse_objects(struct fx_parser *parser) - if (type.element_count) - vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count); - -- if (!fx_4_is_shader_resource(&type)) -- fx_4_parse_object_initializer(parser, &type); -+ fx_4_parse_object_initializer(parser, &type); - vkd3d_string_buffer_printf(&parser->buffer, ";\n"); - - fx_parse_fx_4_annotations(parser); -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 363054cb6d9..0df0e30f399 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -763,16 +763,37 @@ static void shader_glsl_default(struct vkd3d_glsl_generator *gen) - vkd3d_string_buffer_printf(gen->buffer, "default:\n"); - } - -+static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen, -+ unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset) -+{ -+ switch (offset_size) -+ { -+ case 1: -+ vkd3d_string_buffer_printf(buffer, "%d", offset->u); -+ break; -+ case 2: -+ vkd3d_string_buffer_printf(buffer, "ivec2(%d, %d)", offset->u, offset->v); -+ break; -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Invalid texel offset size %u.", offset_size); -+ /* fall through */ -+ case 3: -+ vkd3d_string_buffer_printf(buffer, "ivec3(%d, %d, %d)", offset->u, offset->v, offset->w); -+ break; -+ } -+} -+ - static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -+ unsigned int resource_id, resource_idx, resource_space, sample_count; - const struct glsl_resource_type_info *resource_type_info; -- unsigned int resource_id, resource_idx, resource_space; - const struct vkd3d_shader_descriptor_info1 *d; - enum vkd3d_shader_component_type sampled_type; - enum vkd3d_shader_resource_type resource_type; - struct vkd3d_string_buffer *fetch; - enum vkd3d_data_type data_type; -- struct glsl_src coord, lod; -+ struct glsl_src coord; - struct glsl_dst dst; - uint32_t coord_mask; - -@@ -790,6 +811,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ - { - resource_type = d->resource_type; - resource_space = d->register_space; -+ sample_count = d->sample_count; - sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type); - data_type = vkd3d_data_type_from_component_type(sampled_type); - } -@@ -799,6 +821,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ - "Internal compiler error: Undeclared resource descriptor %u.", resource_id); - resource_space = 0; - resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ sample_count = 1; - data_type = VKD3D_DATA_FLOAT; - } - -@@ -815,7 +838,6 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ - - glsl_dst_init(&dst, gen, ins, &ins->dst[0]); - glsl_src_init(&coord, gen, &ins->src[0], coord_mask); -- glsl_src_init(&lod, gen, &ins->src[0], VKD3DSP_WRITEMASK_3); - fetch = vkd3d_string_buffer_get(&gen->string_buffers); - - vkd3d_string_buffer_printf(fetch, "texelFetch("); -@@ -823,14 +845,23 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_ - resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0); - vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer); - if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER) -- vkd3d_string_buffer_printf(fetch, ", %s", lod.str->buffer); -+ { -+ vkd3d_string_buffer_printf(fetch, ", "); -+ if (ins->opcode != VKD3DSIH_LD2DMS) -+ shader_glsl_print_src(fetch, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, ins->src[0].reg.data_type); -+ else if (sample_count == 1) -+ /* If the resource isn't a true multisample resource, this is the -+ * "lod" parameter instead of the "sample" parameter. */ -+ vkd3d_string_buffer_printf(fetch, "0"); -+ else -+ shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type); -+ } - vkd3d_string_buffer_printf(fetch, ")"); - shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask); - - shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer); - - vkd3d_string_buffer_release(&gen->string_buffers, fetch); -- glsl_src_cleanup(&lod, &gen->string_buffers); - glsl_src_cleanup(&coord, &gen->string_buffers); - glsl_dst_cleanup(&dst, &gen->string_buffers); - } -@@ -868,8 +899,9 @@ static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, s - - static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -- bool shadow_sampler, array, bias, gather, grad, lod, lod_zero, shadow; -+ bool shadow_sampler, array, bias, dynamic_offset, gather, grad, lod, lod_zero, offset, shadow; - const struct glsl_resource_type_info *resource_type_info; -+ const struct vkd3d_shader_src_param *resource, *sampler; - unsigned int resource_id, resource_idx, resource_space; - unsigned int sampler_id, sampler_idx, sampler_space; - const struct vkd3d_shader_descriptor_info1 *d; -@@ -881,23 +913,24 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - struct glsl_dst dst; - - bias = ins->opcode == VKD3DSIH_SAMPLE_B; -- gather = ins->opcode == VKD3DSIH_GATHER4; -+ dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO; -+ gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_PO; - grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD; - lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; - lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ; -+ offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins); - shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ; - -- if (vkd3d_shader_instruction_has_texel_offset(ins)) -- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled texel sample offset."); -+ resource = &ins->src[1 + dynamic_offset]; -+ sampler = &ins->src[2 + dynamic_offset]; - -- if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr -- || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr) -+ if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr -+ || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, - "Descriptor indexing is not supported."); - -- resource_id = ins->src[1].reg.idx[0].offset; -- resource_idx = ins->src[1].reg.idx[1].offset; -+ resource_id = resource->reg.idx[0].offset; -+ resource_idx = resource->reg.idx[1].offset; - if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id))) - { - resource_type = d->resource_type; -@@ -927,8 +960,8 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - array = false; - } - -- sampler_id = ins->src[2].reg.idx[0].offset; -- sampler_idx = ins->src[2].reg.idx[1].offset; -+ sampler_id = sampler->reg.idx[0].offset; -+ sampler_idx = sampler->reg.idx[1].offset; - if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id))) - { - sampler_space = d->register_space; -@@ -958,13 +991,14 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - sample = vkd3d_string_buffer_get(&gen->string_buffers); - - if (gather) -- vkd3d_string_buffer_printf(sample, "textureGather("); -+ vkd3d_string_buffer_printf(sample, "textureGather"); - else if (grad) -- vkd3d_string_buffer_printf(sample, "textureGrad("); -+ vkd3d_string_buffer_printf(sample, "textureGrad"); - else if (lod) -- vkd3d_string_buffer_printf(sample, "textureLod("); -+ vkd3d_string_buffer_printf(sample, "textureLod"); - else -- vkd3d_string_buffer_printf(sample, "texture("); -+ vkd3d_string_buffer_printf(sample, "texture"); -+ vkd3d_string_buffer_printf(sample, "%s(", offset ? "Offset" : ""); - shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space); - vkd3d_string_buffer_printf(sample, ", "); - if (shadow) -@@ -985,18 +1019,32 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk - { - vkd3d_string_buffer_printf(sample, ", 0.0"); - } -- else if (bias || lod) -+ else if (lod) - { - vkd3d_string_buffer_printf(sample, ", "); - shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); - } -- if (gather) -+ if (offset) -+ { -+ vkd3d_string_buffer_printf(sample, ", "); -+ if (dynamic_offset) -+ shader_glsl_print_src(sample, gen, &ins->src[1], -+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[1].reg.data_type); -+ else -+ shader_glsl_print_texel_offset(sample, gen, coord_size - array, &ins->texel_offset); -+ } -+ if (bias) - { -- if ((component_idx = vsir_swizzle_get_component(ins->src[2].swizzle, 0))) -+ vkd3d_string_buffer_printf(sample, ", "); -+ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type); -+ } -+ else if (gather) -+ { -+ if ((component_idx = vsir_swizzle_get_component(sampler->swizzle, 0))) - vkd3d_string_buffer_printf(sample, ", %d", component_idx); - } - vkd3d_string_buffer_printf(sample, ")"); -- shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask); -+ shader_glsl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask); - - shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer); - -@@ -1268,7 +1316,13 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st - "Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type); - vkd3d_string_buffer_printf(buffer, - "uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))"); -+ break; - -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ if (version->type != VKD3D_SHADER_TYPE_PIXEL) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled SV_SAMPLE_INDEX in shader type #%x.", version->type); -+ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_SampleID, 0, 0, 0))"); - break; - - case VKD3D_SHADER_SV_TARGET: -@@ -1390,6 +1444,9 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen) - case VKD3D_SHADER_COMPONENT_UINT: - vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index); - break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, " = floatBitsToInt(%s_out[%u])", gen->prefix, e->register_index); -+ break; - default: - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled output component type %#x.", e->component_type); -@@ -1499,6 +1556,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - shader_glsl_cast(gen, ins, "uint", "uvec"); - break; - case VKD3DSIH_GATHER4: -+ case VKD3DSIH_GATHER4_PO: - case VKD3DSIH_SAMPLE: - case VKD3DSIH_SAMPLE_B: - case VKD3DSIH_SAMPLE_C: -@@ -1553,6 +1611,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, - shader_glsl_cast(gen, ins, "float", "vec"); - break; - case VKD3DSIH_LD: -+ case VKD3DSIH_LD2DMS: - shader_glsl_ld(gen, ins); - break; - case VKD3DSIH_LD_UAV_TYPED: -@@ -1911,6 +1970,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator - struct vkd3d_string_buffer *buffer = gen->buffer; - enum vkd3d_shader_component_type component_type; - const char *sampler_type, *sampler_type_prefix; -+ enum vkd3d_shader_resource_type resource_type; - unsigned int binding_idx; - bool shadow = false; - -@@ -1936,18 +1996,32 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator - return; - } - -- if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type))) -+ resource_type = srv->resource_type; -+ if (srv->sample_count == 1) -+ { -+ /* The OpenGL API distinguishes between multi-sample textures with -+ * sample count 1 and single-sample textures. Direct3D and Vulkan -+ * don't make this distinction at the API level, but Direct3D shaders -+ * are capable of expressing both. We therefore map such multi-sample -+ * textures to their single-sample equivalents here. */ -+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; -+ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY) -+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY; -+ } -+ -+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type))) - { - sampler_type = resource_type_info->type_suffix; - if (shadow && !resource_type_info->shadow) - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED, -- "Comparison samplers are not supported with resource type %#x.", srv->resource_type); -+ "Comparison samplers are not supported with resource type %#x.", resource_type); - } - else - { - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled resource type %#x for combined resource/sampler " -- "for resource %u, space %u and sampler %u, space %u.", srv->resource_type, -+ "for resource %u, space %u and sampler %u, space %u.", resource_type, - crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space); - sampler_type = ""; - } -@@ -1972,7 +2046,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator - break; - } - -- if (!shader_glsl_get_combined_sampler_binding(gen, crs, srv->resource_type, &binding_idx)) -+ if (!shader_glsl_get_combined_sampler_binding(gen, crs, resource_type, &binding_idx)) - { - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND, - "No descriptor binding specified for combined resource/sampler " -@@ -2213,6 +2287,9 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator - case VKD3D_SHADER_COMPONENT_UINT: - vkd3d_string_buffer_printf(buffer, "uvec4"); - break; -+ case VKD3D_SHADER_COMPONENT_INT: -+ vkd3d_string_buffer_printf(buffer, "ivec4"); -+ break; - case VKD3D_SHADER_COMPONENT_FLOAT: - vkd3d_string_buffer_printf(buffer, "vec4"); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 1f90a4ba805..96de18dc886 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2774,6 +2774,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - - case HLSL_CLASS_TEXTURE: -+ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ vkd3d_string_buffer_printf(string, "ByteAddressBuffer"); -+ return string; -+ } -+ - if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC) - { - vkd3d_string_buffer_printf(string, "Texture"); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 18effcc5be1..8dace11916a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -74,6 +74,7 @@ ANY (.) - BlendState {return KW_BLENDSTATE; } - break {return KW_BREAK; } - Buffer {return KW_BUFFER; } -+ByteAddressBuffer {return KW_BYTEADDRESSBUFFER; } - case {return KW_CASE; } - cbuffer {return KW_CBUFFER; } - centroid {return KW_CENTROID; } -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index dcbba46ede6..60aade732db 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -53,7 +53,7 @@ struct parse_parameter - struct parse_initializer initializer; - }; - --struct parse_colon_attribute -+struct parse_colon_attributes - { - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; -@@ -5175,6 +5175,10 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; - struct hlsl_ir_node *expr; - -+ if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL || hlsl_version_lt(ctx, 4, 1)) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); -+ - if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, - operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) - return false; -@@ -5599,6 +5603,55 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct - return false; - } - -+static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, -+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD}; -+ struct hlsl_ir_node *load; -+ unsigned int value_dim; -+ -+ if (params->args_count != 1 && params->args_count != 2) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Wrong number of arguments to method 'Load': expected between 1 and 2, but got %u.", -+ params->args_count); -+ return false; -+ } -+ -+ if (params->args_count == 2) -+ { -+ hlsl_fixme(ctx, loc, "Tiled resource status argument."); -+ return false; -+ } -+ -+ if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Scalar address argument expected for '%s'.", name); -+ return false; -+ } -+ -+ if (!strcmp(name, "Load")) -+ value_dim = 1; -+ else if (!strcmp(name, "Load2")) -+ value_dim = 2; -+ else if (!strcmp(name, "Load3")) -+ value_dim = 3; -+ else -+ value_dim = 4; -+ -+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0], -+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ return false; -+ -+ load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim); -+ load_params.resource = object; -+ -+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) -+ return false; -+ hlsl_block_add_instr(block, load); -+ return true; -+} -+ - static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object, - const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -5608,6 +5661,9 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, - struct hlsl_ir_node *load; - bool multisampled; - -+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ return add_raw_load_method_call(ctx, block, object, name, params, loc); -+ - if (object_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) - { - hlsl_fixme(ctx, loc, "Method '%s' for structured buffers.", name); -@@ -6260,7 +6316,10 @@ texture_methods[] = - - { "GetDimensions", add_getdimensions_method_call, "00111111111110" }, - -- { "Load", add_load_method_call, "00111011110110" }, -+ { "Load", add_load_method_call, "00111011110111" }, -+ { "Load2", add_raw_load_method_call, "00000000000001" }, -+ { "Load3", add_raw_load_method_call, "00000000000001" }, -+ { "Load4", add_raw_load_method_call, "00000000000001" }, - - { "Sample", add_sample_method_call, "00111111001000" }, - { "SampleBias", add_sample_lod_method_call, "00111111001000" }, -@@ -6490,7 +6549,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - struct parse_if_body if_body; - enum parse_assign_op assign_op; - struct hlsl_reg_reservation reg_reservation; -- struct parse_colon_attribute colon_attribute; -+ struct parse_colon_attributes colon_attributes; - struct hlsl_semantic semantic; - enum hlsl_buffer_type buffer_type; - enum hlsl_sampler_dim sampler_dim; -@@ -6505,6 +6564,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - %token KW_BLENDSTATE - %token KW_BREAK - %token KW_BUFFER -+%token KW_BYTEADDRESSBUFFER - %token KW_CASE - %token KW_CONSTANTBUFFER - %token KW_CBUFFER -@@ -6687,7 +6747,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - - %type buffer_type - --%type colon_attribute -+%type colon_attributes - - %type field - %type fields_list -@@ -6875,7 +6935,7 @@ effect_group: - } - - buffer_declaration: -- var_modifiers buffer_type any_identifier colon_attribute annotations_opt -+ var_modifiers buffer_type any_identifier colon_attributes annotations_opt - { - if ($4.semantic.name) - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); -@@ -7200,7 +7260,7 @@ func_declaration: - - func_prototype_no_attrs: - /* var_modifiers is necessary to avoid shift/reduce conflicts. */ -- var_modifiers type var_identifier '(' parameters ')' colon_attribute -+ var_modifiers type var_identifier '(' parameters ')' colon_attributes - { - uint32_t modifiers = $1; - struct hlsl_ir_var *var; -@@ -7377,28 +7437,39 @@ var_identifier: - VAR_IDENTIFIER - | NEW_IDENTIFIER - --colon_attribute: -+colon_attributes: - %empty - { - $$.semantic = (struct hlsl_semantic){0}; - $$.reg_reservation.reg_type = 0; - $$.reg_reservation.offset_type = 0; - } -- | semantic -+ | colon_attributes semantic - { -- $$.semantic = $1; -- $$.reg_reservation.reg_type = 0; -- $$.reg_reservation.offset_type = 0; -+ hlsl_cleanup_semantic(&$$.semantic); -+ $$.semantic = $2; - } -- | register_reservation -+ | colon_attributes register_reservation - { -- $$.semantic = (struct hlsl_semantic){0}; -- $$.reg_reservation = $1; -+ if ($$.reg_reservation.reg_type) -+ hlsl_fixme(ctx, &@2, "Multiple register() reservations."); -+ -+ $$.reg_reservation.reg_type = $2.reg_type; -+ $$.reg_reservation.reg_index = $2.reg_index; -+ $$.reg_reservation.reg_space = $2.reg_space; - } -- | packoffset_reservation -+ | colon_attributes packoffset_reservation - { -- $$.semantic = (struct hlsl_semantic){0}; -- $$.reg_reservation = $1; -+ if (ctx->cur_buffer == ctx->globals_buffer) -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "The packoffset() reservation is only allowed within 'cbuffer' blocks."); -+ } -+ else -+ { -+ $$.reg_reservation.offset_type = $2.offset_type; -+ $$.reg_reservation.offset_index = $2.offset_index; -+ } - } - - semantic: -@@ -7594,7 +7665,7 @@ parameter: - } - - parameter_decl: -- var_modifiers type_no_void any_identifier arrays colon_attribute -+ var_modifiers type_no_void any_identifier arrays colon_attributes - { - uint32_t modifiers = $1; - struct hlsl_type *type; -@@ -7863,6 +7934,10 @@ type_no_void: - - $$ = hlsl_new_texture_type(ctx, $1, $3, sample_count); - } -+ | KW_BYTEADDRESSBUFFER -+ { -+ $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), 0); -+ } - | uav_type '<' resource_format '>' - { - validate_uav_type(ctx, $1, $3, &@3); -@@ -8095,7 +8170,7 @@ variables_def_typed: - } - - variable_decl: -- any_identifier arrays colon_attribute annotations_opt -+ any_identifier arrays colon_attributes annotations_opt - { - $$ = hlsl_alloc(ctx, sizeof(*$$)); - $$->loc = @1; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 213e403dcbd..bea16fd4da6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -6520,6 +6520,21 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog - } - - mask = (1 << var->data_type->dimx) - 1; -+ -+ if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output -+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) -+ { -+ if (var->data_type->dimx > 1) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "PSIZE output must have only 1 component in this shader model."); -+ /* For some reason the writemask has all components set. */ -+ mask = VKD3DSP_WRITEMASK_ALL; -+ } -+ if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3 -+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1) -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, -+ "FOG output must have only 1 component in this shader model."); -+ - use_mask = mask; /* FIXME: retrieve use mask accurately. */ - component_type = VKD3D_SHADER_COMPONENT_FLOAT; - } -@@ -6817,6 +6832,16 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, - } - } - -+static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst, -+ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr) -+{ -+ VKD3D_ASSERT(instr->reg.allocated); -+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); -+ dst->reg.idx[0].offset = instr->reg.id; -+ dst->reg.dimension = VSIR_DIMENSION_VEC4; -+ dst->write_mask = instr->reg.writemask; -+} -+ - static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_constant *constant) - { -@@ -6842,6 +6867,25 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, - dst_param->write_mask = instr->reg.writemask; - } - -+static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr) -+{ -+ struct vkd3d_shader_src_param *src_param; -+ struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_instruction *ins; -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1))) -+ return; -+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT; -+ -+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); -+ -+ src_param = &ins->src[0]; -+ vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0); -+ src_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+} -+ - /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ - static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, - struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, -@@ -6866,10 +6910,7 @@ static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, - return; - - dst_param = &ins->dst[0]; -- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); -- dst_param->reg.idx[0].offset = instr->reg.id; -- dst_param->reg.dimension = VSIR_DIMENSION_VEC4; -- dst_param->write_mask = instr->reg.writemask; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); - dst_param->modifiers = dst_mod; - - for (i = 0; i < src_count; ++i) -@@ -7216,6 +7257,8 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, - - if (deref->var->is_output_semantic) - { -+ const char *semantic_name = deref->var->semantic.name; -+ - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -@@ -7225,7 +7268,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, - type = VKD3DSPR_TEMP; - register_index = 0; - } -- else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name, -+ else if (!sm1_register_from_semantic_name(&version, semantic_name, - deref->var->semantic.index, true, &type, ®ister_index)) - { - VKD3D_ASSERT(reg.allocated); -@@ -7234,6 +7277,14 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, - } - else - writemask = (1u << deref->var->data_type->dimx) - 1; -+ -+ if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE") -+ || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3))) -+ { -+ /* These are always 1-component, but for some reason are written -+ * with a writemask containing all components. */ -+ writemask = VKD3DSP_WRITEMASK_ALL; -+ } - } - else - VKD3D_ASSERT(reg.allocated); -@@ -7642,6 +7693,123 @@ static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, - hlsl_replace_node(instr, vsir_instr); - } - -+static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program, -+ const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block, -+ const struct vkd3d_shader_location *loc) -+{ -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ const bool output = var->is_output_semantic; -+ enum vkd3d_shader_sysval_semantic semantic; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct vkd3d_shader_instruction *ins; -+ enum vkd3d_shader_register_type type; -+ enum vkd3d_shader_opcode opcode; -+ uint32_t write_mask; -+ unsigned int idx; -+ bool has_idx; -+ -+ sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, -+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); -+ if (semantic == ~0u) -+ semantic = VKD3D_SHADER_SV_NONE; -+ -+ if (var->is_input_semantic) -+ { -+ switch (semantic) -+ { -+ case VKD3D_SHADER_SV_NONE: -+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT; -+ break; -+ -+ case VKD3D_SHADER_SV_INSTANCE_ID: -+ case VKD3D_SHADER_SV_IS_FRONT_FACE: -+ case VKD3D_SHADER_SV_PRIMITIVE_ID: -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ case VKD3D_SHADER_SV_VERTEX_ID: -+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV; -+ break; -+ -+ default: -+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV; -+ break; -+ } -+ } -+ else -+ { -+ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) -+ opcode = VKD3DSIH_DCL_OUTPUT; -+ else -+ opcode = VKD3DSIH_DCL_OUTPUT_SIV; -+ } -+ -+ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx)) -+ { -+ if (has_idx) -+ idx = var->semantic.index; -+ write_mask = (1u << var->data_type->dimx) - 1; -+ } -+ else -+ { -+ if (output) -+ type = VKD3DSPR_OUTPUT; -+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -+ type = VKD3DSPR_PATCHCONST; -+ else -+ type = VKD3DSPR_INPUT; -+ -+ has_idx = true; -+ idx = var->regs[HLSL_REGSET_NUMERIC].id; -+ write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0))) -+ return; -+ -+ if (opcode == VKD3DSIH_DCL_OUTPUT) -+ { -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE -+ || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT); -+ dst_param = &ins->declaration.dst; -+ } -+ else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS) -+ { -+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); -+ dst_param = &ins->declaration.dst; -+ } -+ else -+ { -+ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); -+ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic, -+ var->semantic.index); -+ dst_param = &ins->declaration.register_semantic.reg; -+ } -+ -+ if (has_idx) -+ { -+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); -+ dst_param->reg.idx[0].offset = idx; -+ } -+ else -+ { -+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); -+ } -+ -+ if (shader_sm4_is_scalar_register(&dst_param->reg)) -+ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; -+ else -+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; -+ -+ dst_param->write_mask = write_mask; -+ -+ if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL) -+ ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -+ -+ add_last_vsir_instr_to_block(ctx, program, block); -+} -+ - static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, - uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) - { -@@ -7674,76 +7842,754 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, - add_last_vsir_instr_to_block(ctx, program, block); - } - --static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, -- struct vsir_program *program, struct hlsl_ir_expr *expr) -+static bool type_is_float(const struct hlsl_type *type) - { -- switch (expr->op) -- { -- case HLSL_OP1_ABS: -- generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); -- return true; -+ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; -+} - -- default: -- return false; -- } -+static bool type_is_integer(const struct hlsl_type *type) -+{ -+ return type->e.numeric.type == HLSL_TYPE_BOOL -+ || type->e.numeric.type == HLSL_TYPE_INT -+ || type->e.numeric.type == HLSL_TYPE_UINT; - } - --static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) -+static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program, -+ const struct hlsl_ir_expr *expr, uint32_t bits) - { -- struct hlsl_ir_node *instr, *next; -+ struct hlsl_ir_node *operand = expr->operands[0].node; -+ const struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct hlsl_constant_value value = {0}; -+ struct vkd3d_shader_instruction *ins; - -- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask); -+ -+ value.u[0].u = bits; -+ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0); -+} -+ -+static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr) -+{ -+ const struct hlsl_ir_node *arg1 = expr->operands[0].node; -+ const struct hlsl_type *dst_type = expr->node.data_type; -+ const struct hlsl_type *src_type = arg1->data_type; -+ -+ static const union - { -- if (instr->data_type) -- { -- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -+ uint32_t u; -+ float f; -+ } one = { .f = 1.0 }; -+ -+ /* Narrowing casts were already lowered. */ -+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -+ -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ switch (src_type->e.numeric.type) - { -- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -- break; -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u); -+ return true; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float."); -+ return false; -+ -+ default: -+ vkd3d_unreachable(); - } -- } -+ break; - -- switch (instr->type) -- { -- case HLSL_IR_CALL: -- vkd3d_unreachable(); -+ case HLSL_TYPE_INT: -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true); -+ return true; - -- case HLSL_IR_CONSTANT: -- /* In SM4 all constants are inlined. */ -- break; -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; - -- case HLSL_IR_EXPR: -- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr))) -- replace_instr_with_last_vsir_instr(ctx, program, instr); -- break; -+ case HLSL_TYPE_BOOL: -+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); -+ return true; - -- case HLSL_IR_SWIZZLE: -- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -- replace_instr_with_last_vsir_instr(ctx, program, instr); -- break; -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int."); -+ return false; - -- default: -- break; -- } -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_UINT: -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u); -+ return true; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint."); -+ return false; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ break; -+ -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double."); -+ return false; -+ -+ case HLSL_TYPE_BOOL: -+ /* Casts to bool should have already been lowered. */ -+ default: -+ vkd3d_unreachable(); - } - } - --static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, -- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) -+static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program, -+ enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx) - { -- struct hlsl_block block = {0}; -- struct hlsl_scope *scope; -- struct hlsl_ir_var *var; -- uint32_t temp_count; -+ struct vkd3d_shader_dst_param *dst_param, *null_param; -+ const struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int i, src_count; - -- compute_liveness(ctx, func); -- mark_indexable_vars(ctx, func); -- temp_count = allocate_temp_registers(ctx, func); -- if (ctx->result) -+ VKD3D_ASSERT(instr->reg.allocated); -+ -+ for (i = 0; i < HLSL_MAX_OPERANDS; ++i) -+ { -+ if (expr->operands[i].node) -+ src_count = i + 1; -+ } -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count))) - return; -- program->temp_count = max(program->temp_count, temp_count); - -- hlsl_block_init(&block); -+ dst_param = &ins->dst[dst_idx]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ null_param = &ins->dst[1 - dst_idx]; -+ vsir_dst_param_init(null_param, VKD3DSPR_NULL, VKD3D_DATA_FLOAT, 0); -+ null_param->reg.dimension = VSIR_DIMENSION_NONE; -+ -+ for (i = 0; i < src_count; ++i) -+ vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask); -+} -+ -+static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx, -+ struct vsir_program *program, const struct hlsl_ir_expr *expr) -+{ -+ struct hlsl_ir_node *operand = expr->operands[0].node; -+ const struct hlsl_ir_node *instr = &expr->node; -+ struct vkd3d_shader_dst_param *dst_param; -+ struct hlsl_constant_value value = {0}; -+ struct vkd3d_shader_instruction *ins; -+ -+ VKD3D_ASSERT(type_is_float(expr->node.data_type)); -+ -+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2))) -+ return; -+ -+ dst_param = &ins->dst[0]; -+ vsir_dst_from_hlsl_node(dst_param, ctx, instr); -+ -+ value.u[0].f = 1.0f; -+ value.u[1].f = 1.0f; -+ value.u[2].f = 1.0f; -+ value.u[3].f = 1.0f; -+ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, -+ VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask); -+ -+ vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask); -+} -+ -+static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, -+ struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name) -+{ -+ const struct hlsl_type *dst_type = expr->node.data_type; -+ const struct hlsl_type *src_type = NULL; -+ -+ VKD3D_ASSERT(expr->node.reg.allocated); -+ if (expr->operands[0].node) -+ src_type = expr->operands[0].node->data_type; -+ -+ switch (expr->op) -+ { -+ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -+ sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr); -+ return true; -+ -+ case HLSL_OP1_ABS: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); -+ return true; -+ -+ case HLSL_OP1_BIT_NOT: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_CAST: -+ return sm4_generate_vsir_instr_expr_cast(ctx, program, expr); -+ -+ case HLSL_OP1_CEIL: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_COS: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1); -+ return true; -+ -+ case HLSL_OP1_DSX: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSX_COARSE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSX_FINE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSY: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSY_COARSE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_DSY_FINE: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_EXP2: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_F16TOF32: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_F32TOF16: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_FLOOR: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_FRACT: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_LOG2: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_LOGIC_NOT: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_NEG: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP1_RCP: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ /* SM5 comes with a RCP opcode */ -+ if (hlsl_version_ge(ctx, 5, 0)) -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true); -+ else -+ sm4_generate_vsir_rcp_using_div(ctx, program, expr); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP1_REINTERPRET: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_ROUND: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_RSQ: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_SAT: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); -+ return true; -+ -+ case HLSL_OP1_SIN: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0); -+ return true; -+ -+ case HLSL_OP1_SQRT: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true); -+ return true; -+ -+ case HLSL_OP1_TRUNC: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_ADD: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_BIT_AND: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_BIT_OR: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_BIT_XOR: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_DIV: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_DOT: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ switch (expr->operands[0].node->data_type->dimx) -+ { -+ case 4: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); -+ return true; -+ -+ case 3: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); -+ return true; -+ -+ case 2: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false); -+ return true; -+ -+ case 1: -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_EQUAL: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_GEQUAL: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_LESS: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_LOGIC_AND: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_LOGIC_OR: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP2_LSHIFT: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true); -+ return true; -+ -+ case HLSL_OP3_MAD: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MAX: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MIN: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MOD: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_UINT: -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_MUL: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ /* Using IMUL instead of UMUL because we're taking the low -+ * bits, and the native compiler generates IMUL. */ -+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name); -+ return false; -+ } -+ -+ case HLSL_OP2_NEQUAL: -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ -+ switch (src_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true); -+ return true; -+ -+ case HLSL_TYPE_BOOL: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -+ debug_hlsl_type(ctx, src_type)); -+ return false; -+ } -+ -+ case HLSL_OP2_RSHIFT: -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, -+ dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true); -+ return true; -+ -+ case HLSL_OP3_TERNARY: -+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true); -+ return true; -+ -+ default: -+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -+ return false; -+ } -+} -+ -+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) -+{ -+ struct vkd3d_string_buffer *dst_type_string; -+ struct hlsl_ir_node *instr, *next; -+ struct hlsl_ir_switch_case *c; -+ -+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->data_type) -+ { -+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) -+ { -+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); -+ break; -+ } -+ } -+ -+ switch (instr->type) -+ { -+ case HLSL_IR_CALL: -+ vkd3d_unreachable(); -+ -+ case HLSL_IR_CONSTANT: -+ /* In SM4 all constants are inlined. */ -+ break; -+ -+ case HLSL_IR_EXPR: -+ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type))) -+ break; -+ -+ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer)) -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ -+ hlsl_release_string_buffer(ctx, dst_type_string); -+ break; -+ -+ case HLSL_IR_IF: -+ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program); -+ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program); -+ break; -+ -+ case HLSL_IR_LOOP: -+ sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program); -+ break; -+ -+ case HLSL_IR_SWITCH: -+ LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry) -+ sm4_generate_vsir_block(ctx, &c->body, program); -+ break; -+ -+ case HLSL_IR_SWIZZLE: -+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); -+ replace_instr_with_last_vsir_instr(ctx, program, instr); -+ break; -+ -+ default: -+ break; -+ } -+ } -+} -+ -+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, -+ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) -+{ -+ bool is_patch_constant_func = func == ctx->patch_constant_func; -+ struct hlsl_block block = {0}; -+ struct hlsl_scope *scope; -+ struct hlsl_ir_var *var; -+ uint32_t temp_count; -+ -+ compute_liveness(ctx, func); -+ mark_indexable_vars(ctx, func); -+ temp_count = allocate_temp_registers(ctx, func); -+ if (ctx->result) -+ return; -+ program->temp_count = max(program->temp_count, temp_count); -+ -+ hlsl_block_init(&block); -+ -+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if ((var->is_input_semantic && var->last_read) -+ || (var->is_output_semantic && var->first_write)) -+ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc); -+ } - - if (temp_count) - sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 9b50a308e11..836e0ade32a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -1932,6 +1932,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - const struct shader_signature *signature; - const struct signature_element *e; - -+ write_mask = dst_param->write_mask; -+ - switch (reg->type) - { - case VKD3DSPR_OUTPUT: -@@ -1987,6 +1989,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - signature = normaliser->output_signature; - reg->type = VKD3DSPR_OUTPUT; - dcl_params = normaliser->output_dcl_params; -+ /* Fog and point size are scalar, but fxc/d3dcompiler emits a full -+ * write mask when writing to them. */ -+ if (reg->idx[0].offset > 0) -+ write_mask = VKD3DSP_WRITEMASK_0; - break; - - default: -@@ -1994,7 +2000,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - } - - id_idx = reg->idx_count - 1; -- write_mask = dst_param->write_mask; - if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx)) - vkd3d_unreachable(); - e = &signature->elements[element_idx]; -@@ -6214,6 +6219,14 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr - return VKD3D_OK; - } - -+static bool is_pre_rasterization_shader(enum vkd3d_shader_type type) -+{ -+ return type == VKD3D_SHADER_TYPE_VERTEX -+ || type == VKD3D_SHADER_TYPE_HULL -+ || type == VKD3D_SHADER_TYPE_DOMAIN -+ || type == VKD3D_SHADER_TYPE_GEOMETRY; -+} -+ - static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program, - const struct vkd3d_shader_instruction *ret, size_t *ret_pos) - { -@@ -6244,10 +6257,7 @@ static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *pro - if (program->has_point_size) - return VKD3D_OK; - -- if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -- && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY -- && program->shader_version.type != VKD3D_SHADER_TYPE_HULL -- && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) -+ if (!is_pre_rasterization_shader(program->shader_version.type)) - return VKD3D_OK; - - for (unsigned int i = 0; i < program->parameter_count; ++i) -@@ -6298,10 +6308,7 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra - if (!program->has_point_size) - return VKD3D_OK; - -- if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX -- && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY -- && program->shader_version.type != VKD3D_SHADER_TYPE_HULL -- && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN) -+ if (!is_pre_rasterization_shader(program->shader_version.type)) - return VKD3D_OK; - - for (unsigned int i = 0; i < program->parameter_count; ++i) -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index fb7ce063c85..3fa4d68a48a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -8904,15 +8904,20 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler - uint32_t base_coordinate_id, component_idx; - uint32_t constituents[VKD3D_VEC4_SIZE]; - struct vkd3d_shader_image image; -+ bool storage_buffer_uav = false; - uint32_t indices[2]; - unsigned int i, j; - SpvOp op; - - resource = &src[instruction->src_count - 1]; -- resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); - -- if (resource->reg.type == VKD3DSPR_UAV -- && spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource)) -+ if (resource->reg.type == VKD3DSPR_UAV) -+ { -+ resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg); -+ storage_buffer_uav = spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource); -+ } -+ -+ if (storage_buffer_uav) - { - texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id); -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index bbd2f761d29..9c41e2c2053 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -1719,7 +1719,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) - {VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID, VKD3D_SM4_SWIZZLE_NONE}, - {VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM4_RT_NULL, VKD3DSPR_NULL, VKD3D_SM4_SWIZZLE_INVALID}, -- {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_VEC4}, -+ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_SCALAR}, - {VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM, VKD3D_SM4_SWIZZLE_VEC4}, - {VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY, VKD3D_SM4_SWIZZLE_VEC4}, -@@ -2235,7 +2235,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui - return true; - } - --static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) -+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg) - { - switch (reg->type) - { -@@ -2995,20 +2995,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - - static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); - --static bool type_is_integer(const struct hlsl_type *type) --{ -- switch (type->e.numeric.type) -- { -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- return true; -- -- default: -- return false; -- } --} -- - bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, - const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx) - { -@@ -4845,7 +4831,15 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct - } - else - { -- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; -+ switch (component_type->sampler_dim) -+ { -+ case HLSL_SAMPLER_DIM_RAW_BUFFER: -+ instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW; -+ break; -+ default: -+ instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE; -+ break; -+ } - } - instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); - -@@ -4856,135 +4850,62 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct - } - } - --static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf, -- const struct hlsl_ir_var *var, bool is_patch_constant_func) -+static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) - { -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -- const bool output = var->is_output_semantic; -- enum vkd3d_shader_sysval_semantic semantic; -- bool has_idx; -- - struct sm4_instruction instr = - { -- .dsts[0].reg.dimension = VSIR_DIMENSION_VEC4, -- .dst_count = 1, -- }; -- -- if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx)) -- { -- if (has_idx) -- { -- instr.dsts[0].reg.idx[0].offset = var->semantic.index; -- instr.dsts[0].reg.idx_count = 1; -- } -- else -- { -- instr.dsts[0].reg.idx_count = 0; -- } -- instr.dsts[0].write_mask = (1 << var->data_type->dimx) - 1; -- } -- else -- { -- if (output) -- instr.dsts[0].reg.type = VKD3DSPR_OUTPUT; -- else if (version->type == VKD3D_SHADER_TYPE_DOMAIN) -- instr.dsts[0].reg.type = VKD3DSPR_PATCHCONST; -- else -- instr.dsts[0].reg.type = VKD3DSPR_INPUT; -- -- instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; -- instr.dsts[0].reg.idx_count = 1; -- instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask; -- } -- -- if (shader_sm4_is_scalar_register(&instr.dsts[0].reg)) -- instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; -- -- sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping, -- tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func); -- if (semantic == ~0u) -- semantic = VKD3D_SHADER_SV_NONE; -- -- if (var->is_input_semantic) -- { -- switch (semantic) -- { -- case VKD3D_SHADER_SV_NONE: -- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; -- break; -- -- case VKD3D_SHADER_SV_INSTANCE_ID: -- case VKD3D_SHADER_SV_IS_FRONT_FACE: -- case VKD3D_SHADER_SV_PRIMITIVE_ID: -- case VKD3D_SHADER_SV_SAMPLE_INDEX: -- case VKD3D_SHADER_SV_VERTEX_ID: -- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; -- break; -+ .opcode = VKD3D_SM4_OP_DCL_TEMPS, - -- default: -- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL) -- ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV; -- break; -- } -+ .idx = {count}, -+ .idx_count = 1, -+ }; - -- if (version->type == VKD3D_SHADER_TYPE_PIXEL) -- { -- enum vkd3d_shader_interpolation_mode mode; -+ write_sm4_instruction(tpf, &instr); -+} - -- mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers); -- instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT; -- } -- } -- else -+static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) -+{ -+ struct sm4_instruction instr = - { -- if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL) -- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; -- else -- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; -- } -+ .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, - -- if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT) -- { -- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET -- || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT); -- } -- else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS) -- { -- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE); -- } -- else -- { -- VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE); -- instr.idx_count = 1; -- instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index); -- } -+ .idx = {temp->register_idx, temp->register_size, temp->component_count}, -+ .idx_count = 3, -+ }; - - write_sm4_instruction(tpf, &instr); - } - --static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count) -+static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM4_OP_DCL_TEMPS, -+ .opcode = opcode, - -- .idx = {count}, -- .idx_count = 1, -+ .dsts[0] = *dst, -+ .dst_count = 1, -+ -+ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); - } - --static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp) -+static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -+ const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags) - { - struct sm4_instruction instr = - { -- .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP, -+ .opcode = opcode, - -- .idx = {temp->register_idx, temp->register_size, temp->component_count}, -- .idx_count = 3, -+ .dsts[0] = semantic->reg, -+ .dst_count = 1, -+ -+ .idx[0] = semantic->sysval_semantic, -+ .idx_count = 1, -+ -+ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT, - }; - - write_sm4_instruction(tpf, &instr); -@@ -5111,125 +5032,6 @@ static void write_sm4_ret(const struct tpf_compiler *tpf) - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[0].write_mask); -- instr.srcs[0].modifiers = src_mod; -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); -- sm4_dst_from_node(&instr.dsts[dst_idx], dst); -- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; -- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -- instr.dsts[1 - dst_idx].reg.idx_count = 0; -- instr.dst_count = 2; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[dst_idx].write_mask); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --/* dp# instructions don't map the swizzle. */ --static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf, -- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx, -- const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); -- sm4_dst_from_node(&instr.dsts[dst_idx], dst); -- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; -- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -- instr.dsts[1 - dst_idx].reg.idx_count = 0; -- instr.dst_count = 2; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[dst_idx].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[dst_idx].write_mask); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode, -- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2, -- const struct hlsl_ir_node *src3) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = opcode; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[2], src3, instr.dsts[0].write_mask); -- instr.src_count = 3; -- -- write_sm4_instruction(tpf, &instr); --} -- - static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst, - const struct hlsl_deref *resource, const struct hlsl_ir_node *coords, - const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset, -@@ -5240,12 +5042,15 @@ static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_no - && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY); - bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS); - const struct vkd3d_shader_version *version = &tpf->program->shader_version; -+ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER; - unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL; - struct sm4_instruction instr; - - memset(&instr, 0, sizeof(instr)); - if (uav) - instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED; -+ else if (raw) -+ instr.opcode = VKD3D_SM5_OP_LD_RAW; - else - instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD; - -@@ -5441,742 +5246,67 @@ static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_ - write_sm4_instruction(tpf, &instr); - } - --static bool type_is_float(const struct hlsl_type *type) -+static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) - { -- return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF; --} -+ struct sm4_instruction instr = -+ { -+ .opcode = VKD3D_SM4_OP_IF, -+ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, -+ .src_count = 1, -+ }; - --static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr, -- const struct hlsl_ir_node *arg, uint32_t mask) --{ -- struct sm4_instruction instr; -+ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_AND; -+ sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -+ write_sm4_instruction(tpf, &instr); - -- sm4_dst_from_node(&instr.dsts[0], &expr->node); -- instr.dst_count = 1; -+ write_sm4_block(tpf, &iff->then_block); - -- sm4_src_from_node(tpf, &instr.srcs[0], arg, instr.dsts[0].write_mask); -- instr.srcs[1].reg.type = VKD3DSPR_IMMCONST; -- instr.srcs[1].reg.dimension = VSIR_DIMENSION_SCALAR; -- instr.srcs[1].reg.u.immconst_u32[0] = mask; -- instr.src_count = 2; -+ if (!list_empty(&iff->else_block.instrs)) -+ { -+ instr.opcode = VKD3D_SM4_OP_ELSE; -+ instr.src_count = 0; -+ write_sm4_instruction(tpf, &instr); -+ -+ write_sm4_block(tpf, &iff->else_block); -+ } - -+ instr.opcode = VKD3D_SM4_OP_ENDIF; -+ instr.src_count = 0; - write_sm4_instruction(tpf, &instr); - } - --static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) -+static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) - { -- static const union -- { -- uint32_t u; -- float f; -- } one = { .f = 1.0 }; -- const struct hlsl_ir_node *arg1 = expr->operands[0].node; -- const struct hlsl_type *dst_type = expr->node.data_type; -- const struct hlsl_type *src_type = arg1->data_type; -- -- /* Narrowing casts were already lowered. */ -- VKD3D_ASSERT(src_type->dimx == dst_type->dimx); -+ struct sm4_instruction instr = {0}; - -- switch (dst_type->e.numeric.type) -+ switch (jump->type) - { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(tpf, expr, arg1, one.u); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -+ case HLSL_IR_JUMP_BREAK: -+ instr.opcode = VKD3D_SM4_OP_BREAK; - break; - -- case HLSL_TYPE_INT: -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(tpf, expr, arg1, 1); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int."); -- break; -- -- default: -- vkd3d_unreachable(); -- } -+ case HLSL_IR_JUMP_CONTINUE: -+ instr.opcode = VKD3D_SM4_OP_CONTINUE; - break; - -- case HLSL_TYPE_UINT: -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_HALF: -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_TYPE_BOOL: -- write_sm4_cast_from_bool(tpf, expr, arg1, 1); -- break; -- -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint."); -- break; -+ case HLSL_IR_JUMP_DISCARD_NZ: -+ { -+ instr.opcode = VKD3D_SM4_OP_DISCARD; -+ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; - -- default: -- vkd3d_unreachable(); -- } -+ memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -+ instr.src_count = 1; -+ sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); - break; -+ } - -- case HLSL_TYPE_DOUBLE: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double."); -- break; -+ case HLSL_IR_JUMP_RETURN: -+ vkd3d_unreachable(); - -- case HLSL_TYPE_BOOL: -- /* Casts to bool should have already been lowered. */ - default: -- vkd3d_unreachable(); -- } --} -- --static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst) --{ -- struct sm4_instruction instr; -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -- -- sm4_dst_from_node(&instr.dsts[0], dst); -- instr.dst_count = 1; -- -- instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; -- instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; -- instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -- instr.src_count = 1; -- -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr) --{ -- const struct vkd3d_shader_version *version = &tpf->program->shader_version; -- const struct hlsl_ir_node *arg1 = expr->operands[0].node; -- const struct hlsl_ir_node *arg2 = expr->operands[1].node; -- const struct hlsl_ir_node *arg3 = expr->operands[2].node; -- const struct hlsl_type *dst_type = expr->node.data_type; -- struct vkd3d_string_buffer *dst_type_string; -- -- VKD3D_ASSERT(expr->node.reg.allocated); -- -- if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) -- return; -- -- switch (expr->op) -- { -- case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -- if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1)) -- write_sm4_rasterizer_sample_count(tpf, &expr->node); -- else -- hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -- "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); -- break; -- -- case HLSL_OP1_ABS: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_BIT_NOT: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_CAST: -- write_sm4_cast(tpf, expr); -- break; -- -- case HLSL_OP1_CEIL: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_COS: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); -- break; -- -- case HLSL_OP1_DSX: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSX_COARSE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSX_FINE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSY: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSY_COARSE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_DSY_FINE: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_EXP2: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_F16TOF32: -- VKD3D_ASSERT(type_is_float(dst_type)); -- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_F32TOF16: -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT); -- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F32TOF16, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_FLOOR: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_FRACT: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_LOG2: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_LOGIC_NOT: -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_NEG: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_RCP: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- /* SM5 comes with a RCP opcode */ -- if (vkd3d_shader_ver_ge(version, 5, 0)) -- { -- write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); -- } -- else -- { -- /* For SM4, implement as DIV dst, 1.0, src */ -- struct sm4_instruction instr; -- struct hlsl_constant_value one; -- -- VKD3D_ASSERT(type_is_float(dst_type)); -- -- memset(&instr, 0, sizeof(instr)); -- instr.opcode = VKD3D_SM4_OP_DIV; -- -- sm4_dst_from_node(&instr.dsts[0], &expr->node); -- instr.dst_count = 1; -- -- for (unsigned int i = 0; i < 4; i++) -- one.u[i].f = 1.0f; -- sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); -- sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); -- instr.src_count = 2; -- -- write_sm4_instruction(tpf, &instr); -- } -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP1_REINTERPRET: -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_ROUND: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_RSQ: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_SAT: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV -- | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), -- &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_SIN: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); -- break; -- -- case HLSL_OP1_SQRT: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP1_TRUNC: -- VKD3D_ASSERT(type_is_float(dst_type)); -- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); -- break; -- -- case HLSL_OP2_ADD: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_BIT_AND: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_BIT_OR: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_BIT_XOR: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_DIV: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_DOT: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- switch (arg1->data_type->dimx) -- { -- case 4: -- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2); -- break; -- -- case 3: -- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2); -- break; -- -- case 2: -- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2); -- break; -- -- case 1: -- default: -- vkd3d_unreachable(); -- } -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_EQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_GEQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_LESS: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_LOGIC_AND: -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_LOGIC_OR: -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_LSHIFT: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP2_MAX: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_MIN: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_MOD: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_UINT: -- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_MUL: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- /* Using IMUL instead of UMUL because we're taking the low -- * bits, and the native compiler generates IMUL. */ -- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer); -- } -- break; -- -- case HLSL_OP2_NEQUAL: -- { -- const struct hlsl_type *src_type = arg1->data_type; -- -- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- -- switch (src_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2); -- break; -- -- case HLSL_TYPE_BOOL: -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.", -- debug_hlsl_type(tpf->ctx, src_type)); -- break; -- } -- break; -- } -- -- case HLSL_OP2_RSHIFT: -- VKD3D_ASSERT(type_is_integer(dst_type)); -- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -- write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, -- &expr->node, arg1, arg2); -- break; -- -- case HLSL_OP3_TERNARY: -- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3); -- break; -- -- case HLSL_OP3_MAD: -- switch (dst_type->e.numeric.type) -- { -- case HLSL_TYPE_FLOAT: -- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MAD, &expr->node, arg1, arg2, arg3); -- break; -- -- case HLSL_TYPE_INT: -- case HLSL_TYPE_UINT: -- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_IMAD, &expr->node, arg1, arg2, arg3); -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer); -- } -- break; -- -- default: -- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op)); -- } -- -- hlsl_release_string_buffer(tpf->ctx, dst_type_string); --} -- --static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) --{ -- struct sm4_instruction instr = -- { -- .opcode = VKD3D_SM4_OP_IF, -- .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, -- .src_count = 1, -- }; -- -- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); -- -- sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); -- write_sm4_instruction(tpf, &instr); -- -- write_sm4_block(tpf, &iff->then_block); -- -- if (!list_empty(&iff->else_block.instrs)) -- { -- instr.opcode = VKD3D_SM4_OP_ELSE; -- instr.src_count = 0; -- write_sm4_instruction(tpf, &instr); -- -- write_sm4_block(tpf, &iff->else_block); -- } -- -- instr.opcode = VKD3D_SM4_OP_ENDIF; -- instr.src_count = 0; -- write_sm4_instruction(tpf, &instr); --} -- --static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump) --{ -- struct sm4_instruction instr = {0}; -- -- switch (jump->type) -- { -- case HLSL_IR_JUMP_BREAK: -- instr.opcode = VKD3D_SM4_OP_BREAK; -- break; -- -- case HLSL_IR_JUMP_CONTINUE: -- instr.opcode = VKD3D_SM4_OP_CONTINUE; -- break; -- -- case HLSL_IR_JUMP_DISCARD_NZ: -- { -- instr.opcode = VKD3D_SM4_OP_DISCARD; -- instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ; -- -- memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); -- instr.src_count = 1; -- sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); -- break; -- } -- -- case HLSL_IR_JUMP_RETURN: -- vkd3d_unreachable(); -- -- default: -- hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -- return; -+ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ return; - } - - write_sm4_instruction(tpf, &instr); -@@ -6506,11 +5636,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - } - - instr.opcode = info->opcode; -+ instr.extra_bits = ins->flags << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; - instr.dst_count = ins->dst_count; - instr.src_count = ins->src_count; - - for (unsigned int i = 0; i < ins->dst_count; ++i) -+ { - instr.dsts[i] = ins->dst[i]; -+ -+ if (instr.dsts[i].modifiers & VKD3DSPDM_SATURATE) -+ { -+ /* For vsir SATURATE is a dst modifier, while for tpf it is an instruction flag. */ -+ VKD3D_ASSERT(ins->dst_count == 1); -+ instr.dsts[i].modifiers &= ~VKD3DSPDM_SATURATE; -+ instr.extra_bits |= VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ } -+ } - for (unsigned int i = 0; i < ins->src_count; ++i) - instr.srcs[i] = ins->src[i]; - -@@ -6529,7 +5670,99 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ - tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); - break; - -+ case VKD3DSIH_DCL_INPUT: -+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT, &ins->declaration.dst, 0); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_PS: -+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS, &ins->declaration.dst, ins->flags); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_PS_SGV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SGV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_PS_SIV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SIV, &ins->declaration.register_semantic, ins->flags); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_SGV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SGV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_DCL_INPUT_SIV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SIV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_DCL_OUTPUT: -+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT, &ins->declaration.dst, 0); -+ break; -+ -+ case VKD3DSIH_DCL_OUTPUT_SIV: -+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0); -+ break; -+ -+ case VKD3DSIH_ADD: -+ case VKD3DSIH_AND: -+ case VKD3DSIH_DIV: -+ case VKD3DSIH_DP2: -+ case VKD3DSIH_DP3: -+ case VKD3DSIH_DP4: -+ case VKD3DSIH_DSX: -+ case VKD3DSIH_DSX_COARSE: -+ case VKD3DSIH_DSX_FINE: -+ case VKD3DSIH_DSY: -+ case VKD3DSIH_DSY_COARSE: -+ case VKD3DSIH_DSY_FINE: -+ case VKD3DSIH_EQO: -+ case VKD3DSIH_EXP: -+ case VKD3DSIH_F16TOF32: -+ case VKD3DSIH_F32TOF16: -+ case VKD3DSIH_FRC: -+ case VKD3DSIH_FTOI: -+ case VKD3DSIH_FTOU: -+ case VKD3DSIH_GEO: -+ case VKD3DSIH_IADD: -+ case VKD3DSIH_IEQ: -+ case VKD3DSIH_IGE: -+ case VKD3DSIH_ILT: -+ case VKD3DSIH_IMAD: -+ case VKD3DSIH_IMAX: -+ case VKD3DSIH_IMIN: -+ case VKD3DSIH_IMUL: -+ case VKD3DSIH_INE: -+ case VKD3DSIH_INEG: -+ case VKD3DSIH_ISHL: -+ case VKD3DSIH_ISHR: -+ case VKD3DSIH_ITOF: -+ case VKD3DSIH_LOG: -+ case VKD3DSIH_LTO: -+ case VKD3DSIH_MAD: -+ case VKD3DSIH_MAX: -+ case VKD3DSIH_MIN: - case VKD3DSIH_MOV: -+ case VKD3DSIH_MOVC: -+ case VKD3DSIH_MUL: -+ case VKD3DSIH_NEU: -+ case VKD3DSIH_NOT: -+ case VKD3DSIH_OR: -+ case VKD3DSIH_RCP: -+ case VKD3DSIH_ROUND_NE: -+ case VKD3DSIH_ROUND_NI: -+ case VKD3DSIH_ROUND_PI: -+ case VKD3DSIH_ROUND_Z: -+ case VKD3DSIH_RSQ: -+ case VKD3DSIH_SAMPLE_INFO: -+ case VKD3DSIH_SINCOS: -+ case VKD3DSIH_SQRT: -+ case VKD3DSIH_UDIV: -+ case VKD3DSIH_UGE: -+ case VKD3DSIH_ULT: -+ case VKD3DSIH_UMAX: -+ case VKD3DSIH_UMIN: -+ case VKD3DSIH_USHR: -+ case VKD3DSIH_UTOF: -+ case VKD3DSIH_XOR: - tpf_simple_instruction(tpf, ins); - break; - -@@ -6568,10 +5801,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b - case HLSL_IR_CONSTANT: - vkd3d_unreachable(); - -- case HLSL_IR_EXPR: -- write_sm4_expr(tpf, hlsl_ir_expr(instr)); -- break; -- - case HLSL_IR_IF: - write_sm4_if(tpf, hlsl_ir_if(instr)); - break; -@@ -6621,16 +5850,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b - - static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func) - { -- struct hlsl_ctx *ctx = tpf->ctx; -- const struct hlsl_ir_var *var; -- -- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry) -- { -- if ((var->is_input_semantic && var->last_read) -- || (var->is_output_semantic && var->first_write)) -- tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func); -- } -- - if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE) - tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size); - -@@ -6648,6 +5867,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec - const struct hlsl_buffer *cbuffer; - struct hlsl_ctx *ctx = tpf->ctx; - size_t token_count_position; -+ uint32_t global_flags = 0; - - static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] = - { -@@ -6669,6 +5889,27 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec - put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type])); - token_count_position = put_u32(&buffer, 0); - -+ if (version->major == 4) -+ { -+ for (i = 0; i < extern_resources_count; ++i) -+ { -+ const struct extern_resource *resource = &extern_resources[i]; -+ const struct hlsl_type *type = resource->component_type; -+ -+ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER) -+ { -+ global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS; -+ break; -+ } -+ } -+ } -+ -+ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) -+ global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL; -+ -+ if (global_flags) -+ write_sm4_dcl_global_flags(tpf, global_flags); -+ - if (version->type == VKD3D_SHADER_TYPE_HULL) - { - tpf_write_hs_decls(tpf); -@@ -6703,9 +5944,6 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec - write_sm4_dcl_textures(tpf, resource, true); - } - -- if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0)) -- write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL); -- - if (version->type == VKD3D_SHADER_TYPE_HULL) - tpf_write_hs_control_point_phase(tpf); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 1b6c37343d1..db18e6d12bc 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1620,6 +1620,7 @@ bool sm1_usage_from_semantic_name(const char *semantic_name, - uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); - bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version, - const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx); -+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg); - bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic, - const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain, - const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func); -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index ed4cc370639..a55a97f6f2f 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -4804,15 +4804,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; - const struct vkd3d_vk_device_procs *vk_procs; - VkBuffer buffers[ARRAY_SIZE(list->strides)]; -+ struct d3d12_device *device = list->device; -+ unsigned int i, stride, max_view_count; - struct d3d12_resource *resource; - bool invalidate = false; -- unsigned int i, stride; - - TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views); - -- vk_procs = &list->device->vk_procs; -- null_resources = &list->device->null_resources; -- gpu_va_allocator = &list->device->gpu_va_allocator; -+ vk_procs = &device->vk_procs; -+ null_resources = &device->null_resources; -+ gpu_va_allocator = &device->gpu_va_allocator; - - if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides))) - { -@@ -4820,6 +4821,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - return; - } - -+ max_view_count = device->vk_info.device_limits.maxVertexInputBindings; -+ if (start_slot < max_view_count) -+ max_view_count -= start_slot; -+ else -+ max_view_count = 0; -+ -+ /* Although simply skipping unsupported binding slots isn't especially -+ * likely to work well in the general case, applications sometimes -+ * explicitly set all 32 vertex buffer bindings slots supported by -+ * Direct3D 12, with unused slots set to NULL. "Spider-Man Remastered" is -+ * an example of such an application. */ -+ if (view_count > max_view_count) -+ { -+ for (i = max_view_count; i < view_count; ++i) -+ { -+ if (views && views[i].BufferLocation) -+ WARN("Ignoring unsupported vertex buffer slot %u.\n", start_slot + i); -+ } -+ view_count = max_view_count; -+ } -+ - for (i = 0; i < view_count; ++i) - { - if (views && views[i].BufferLocation) --- -2.45.2 - diff --git a/patches/vkd3d-latest/0008-Updated-vkd3d-to-c010fb63a1290721271046d535d7f429a24.patch b/patches/vkd3d-latest/0008-Updated-vkd3d-to-c010fb63a1290721271046d535d7f429a24.patch deleted file mode 100644 index 59ab3962..00000000 --- a/patches/vkd3d-latest/0008-Updated-vkd3d-to-c010fb63a1290721271046d535d7f429a24.patch +++ /dev/null @@ -1,1090 +0,0 @@ -From 4915beabd9d8c038b82016719edaa116d5276ae5 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 14 Nov 2024 06:29:22 +1100 -Subject: [PATCH] Updated vkd3d to c010fb63a1290721271046d535d7f429a24f255e. - ---- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 42 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 647 +++++++++++------- - libs/vkd3d/libs/vkd3d-shader/ir.c | 30 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 37 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 2 + - 5 files changed, 499 insertions(+), 259 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 3235a278769..7099bcc9ce2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -430,6 +430,7 @@ enum dx_intrinsic_opcode - DX_DERIV_COARSEY = 84, - DX_DERIV_FINEX = 85, - DX_DERIV_FINEY = 86, -+ DX_SAMPLE_INDEX = 90, - DX_COVERAGE = 91, - DX_THREAD_ID = 93, - DX_GROUP_ID = 94, -@@ -3827,6 +3828,11 @@ static enum vkd3d_shader_register_type register_type_from_dxil_semantic_kind( - { - switch (sysval_semantic) - { -+ /* VSIR does not use an I/O register for SV_SampleIndex, but its -+ * signature element has a register index of UINT_MAX and it is -+ * convenient to return a valid register type here to handle it. */ -+ case VKD3D_SHADER_SV_SAMPLE_INDEX: -+ return VKD3DSPR_NULL; - case VKD3D_SHADER_SV_COVERAGE: - return VKD3DSPR_COVERAGE; - case VKD3D_SHADER_SV_DEPTH: -@@ -3844,6 +3850,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - bool is_input, enum vkd3d_shader_register_type reg_type, struct vkd3d_shader_dst_param *params) - { - enum vkd3d_shader_type shader_type = sm6->p.program->shader_version.type; -+ enum vkd3d_shader_register_type io_reg_type; - bool is_patch_constant, is_control_point; - struct vkd3d_shader_dst_param *param; - const struct signature_element *e; -@@ -3876,9 +3883,10 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - - param = ¶ms[i]; - -- if (e->register_index == UINT_MAX) -+ if (e->register_index == UINT_MAX -+ && (io_reg_type = register_type_from_dxil_semantic_kind(e->sysval_semantic)) != VKD3DSPR_NULL) - { -- dst_param_io_init(param, e, register_type_from_dxil_semantic_kind(e->sysval_semantic)); -+ dst_param_io_init(param, e, io_reg_type); - continue; - } - -@@ -5795,6 +5803,34 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ - instruction_dst_param_init_ssa_vector(ins, component_count, sm6); - } - -+static void sm6_parser_emit_dx_sample_index(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ const struct shader_signature *signature = &sm6->p.program->input_signature; -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ unsigned int element_idx; -+ -+ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); -+ -+ /* SV_SampleIndex is identified in VSIR by its signature element index, -+ * but the index is not supplied as a parameter to the DXIL intrinsic. */ -+ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_SAMPLE_INDEX, 0, &element_idx)) -+ { -+ WARN("Sample index is not in the signature.\n"); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_SIGNATURE, -+ "Sample index signature element for a sample index operation is missing."); -+ return; -+ } -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param->reg = sm6->input_params[element_idx].reg; -+ src_param_init(src_param); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -6300,6 +6336,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_SAMPLE_C ] = {"o", "HHffffiiiff", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, -+ [DX_SAMPLE_INDEX ] = {"i", "", sm6_parser_emit_dx_sample_index}, - [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, - [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, - [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, -@@ -8513,6 +8550,7 @@ static const enum vkd3d_shader_sysval_semantic sysval_semantic_table[] = - [SEMANTIC_KIND_CLIPDISTANCE] = VKD3D_SHADER_SV_CLIP_DISTANCE, - [SEMANTIC_KIND_CULLDISTANCE] = VKD3D_SHADER_SV_CULL_DISTANCE, - [SEMANTIC_KIND_PRIMITIVEID] = VKD3D_SHADER_SV_PRIMITIVE_ID, -+ [SEMANTIC_KIND_SAMPLEINDEX] = VKD3D_SHADER_SV_SAMPLE_INDEX, - [SEMANTIC_KIND_ISFRONTFACE] = VKD3D_SHADER_SV_IS_FRONT_FACE, - [SEMANTIC_KIND_COVERAGE] = VKD3D_SHADER_SV_COVERAGE, - [SEMANTIC_KIND_TARGET] = VKD3D_SHADER_SV_TARGET, -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 9b1ef3bb2e0..ac69bbf950e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -500,6 +500,9 @@ enum fx_4_type_constants - - /* Object types */ - FX_4_OBJECT_TYPE_STRING = 0x1, -+ FX_4_OBJECT_TYPE_BLEND_STATE = 0x2, -+ FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE = 0x3, -+ FX_4_OBJECT_TYPE_RASTERIZER_STATE = 0x4, - FX_4_OBJECT_TYPE_PIXEL_SHADER = 0x5, - FX_4_OBJECT_TYPE_VERTEX_SHADER = 0x6, - FX_4_OBJECT_TYPE_GEOMETRY_SHADER = 0x7, -@@ -516,6 +519,7 @@ enum fx_4_type_constants - FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11, - FX_4_OBJECT_TYPE_RTV = 0x13, - FX_4_OBJECT_TYPE_DSV = 0x14, -+ FX_4_OBJECT_TYPE_SAMPLER_STATE = 0x15, - FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17, - - FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b, -@@ -540,6 +544,12 @@ enum fx_4_type_constants - FX_4_TYPE_CLASS_NUMERIC = 1, - FX_4_TYPE_CLASS_OBJECT = 2, - FX_4_TYPE_CLASS_STRUCT = 3, -+ -+ /* Assignment types */ -+ FX_4_ASSIGNMENT_CONSTANT = 0x1, -+ FX_4_ASSIGNMENT_VARIABLE = 0x2, -+ FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX = 0x3, -+ FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX = 0x4, - }; - - static const uint32_t fx_4_numeric_base_types[] = -@@ -816,7 +826,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - } - else if (element_type->class == HLSL_CLASS_SAMPLER) - { -- put_u32_unaligned(buffer, 21); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_SAMPLER_STATE); - } - else if (element_type->class == HLSL_CLASS_UAV) - { -@@ -852,15 +862,15 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - } - else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE) - { -- put_u32_unaligned(buffer, 4); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RASTERIZER_STATE); - } - else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) - { -- put_u32_unaligned(buffer, 3); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE); - } - else if (element_type->class == HLSL_CLASS_BLEND_STATE) - { -- put_u32_unaligned(buffer, 2); -+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_BLEND_STATE); - } - else if (element_type->class == HLSL_CLASS_STRING) - { -@@ -1649,7 +1659,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - c = hlsl_ir_constant(value); - - value_offset = write_fx_4_state_numeric_value(c, fx); -- assignment_type = 1; -+ assignment_type = FX_4_ASSIGNMENT_CONSTANT; - break; - } - case HLSL_IR_LOAD: -@@ -1660,7 +1670,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); - - value_offset = write_fx_4_string(load->src.var->name, fx); -- assignment_type = 2; -+ assignment_type = FX_4_ASSIGNMENT_VARIABLE; - break; - } - case HLSL_IR_INDEX: -@@ -1687,7 +1697,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - c = hlsl_ir_constant(idx); - value_offset = put_u32(unstructured, value_offset); - put_u32(unstructured, c->value.u[0].u); -- assignment_type = 3; -+ assignment_type = FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX; - - if (c->value.u[0].u >= type->e.array.elements_count) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -@@ -1708,7 +1718,7 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - - value_offset = put_u32(unstructured, value_offset); - put_u32(unstructured, offset); -- assignment_type = 4; -+ assignment_type = FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX; - break; - } - } -@@ -1824,6 +1834,7 @@ enum state_property_component_type - FX_BLEND, - FX_VERTEXSHADER, - FX_PIXELSHADER, -+ FX_COMPONENT_TYPE_COUNT, - }; - - static inline bool is_object_fx_type(enum state_property_component_type type) -@@ -1894,230 +1905,227 @@ static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_com - } - } - --static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -- struct fx_write_context *fx) --{ -- static const struct rhs_named_value filter_values[] = -- { -- { "MIN_MAG_MIP_POINT", 0x00 }, -- { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, -- { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, -- { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, -- { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, -- { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, -- { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, -- { "MIN_MAG_MIP_LINEAR", 0x15 }, -- { "ANISOTROPIC", 0x55 }, -- { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, -- { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, -- { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, -- { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, -- { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, -- { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, -- { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, -- { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, -- { "COMPARISON_ANISOTROPIC", 0xd5 }, -- { NULL }, -- }; -- -- static const struct rhs_named_value address_values[] = -- { -- { "WRAP", 1 }, -- { "MIRROR", 2 }, -- { "CLAMP", 3 }, -- { "BORDER", 4 }, -- { "MIRROR_ONCE", 5 }, -- { NULL }, -- }; -+static const struct rhs_named_value filter_values[] = -+{ -+ { "MIN_MAG_MIP_POINT", 0x00 }, -+ { "MIN_MAG_POINT_MIP_LINEAR", 0x01 }, -+ { "MIN_POINT_MAG_LINEAR_MIP_POINT", 0x04 }, -+ { "MIN_POINT_MAG_MIP_LINEAR", 0x05 }, -+ { "MIN_LINEAR_MAG_MIP_POINT", 0x10 }, -+ { "MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x11 }, -+ { "MIN_MAG_LINEAR_MIP_POINT", 0x14 }, -+ { "MIN_MAG_MIP_LINEAR", 0x15 }, -+ { "ANISOTROPIC", 0x55 }, -+ { "COMPARISON_MIN_MAG_MIP_POINT", 0x80 }, -+ { "COMPARISON_MIN_MAG_POINT_MIP_LINEAR", 0x81 }, -+ { "COMPARISON_MIN_POINT_MAG_LINEAR_MIP_POINT", 0x84 }, -+ { "COMPARISON_MIN_POINT_MAG_MIP_LINEAR", 0x85 }, -+ { "COMPARISON_MIN_LINEAR_MAG_MIP_POINT", 0x90 }, -+ { "COMPARISON_MIN_LINEAR_MAG_POINT_MIP_LINEAR", 0x91 }, -+ { "COMPARISON_MIN_MAG_LINEAR_MIP_POINT", 0x94 }, -+ { "COMPARISON_MIN_MAG_MIP_LINEAR", 0x95 }, -+ { "COMPARISON_ANISOTROPIC", 0xd5 }, -+ { NULL }, -+}; - -- static const struct rhs_named_value compare_func_values[] = -- { -- { "NEVER", 1 }, -- { "LESS", 2 }, -- { "EQUAL", 3 }, -- { "LESS_EQUAL", 4 }, -- { "GREATER", 5 }, -- { "NOT_EQUAL", 6 }, -- { "GREATER_EQUAL", 7 }, -- { "ALWAYS", 8 }, -- { NULL } -- }; -+static const struct rhs_named_value address_values[] = -+{ -+ { "WRAP", 1 }, -+ { "MIRROR", 2 }, -+ { "CLAMP", 3 }, -+ { "BORDER", 4 }, -+ { "MIRROR_ONCE", 5 }, -+ { NULL }, -+}; - -- static const struct rhs_named_value depth_write_mask_values[] = -- { -- { "ZERO", 0 }, -- { "ALL", 1 }, -- { NULL } -- }; -+static const struct rhs_named_value compare_func_values[] = -+{ -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+}; - -- static const struct rhs_named_value comparison_values[] = -- { -- { "NEVER", 1 }, -- { "LESS", 2 }, -- { "EQUAL", 3 }, -- { "LESS_EQUAL", 4 }, -- { "GREATER", 5 }, -- { "NOT_EQUAL", 6 }, -- { "GREATER_EQUAL", 7 }, -- { "ALWAYS", 8 }, -- { NULL } -- }; -+static const struct rhs_named_value depth_write_mask_values[] = -+{ -+ { "ZERO", 0 }, -+ { "ALL", 1 }, -+ { NULL } -+}; - -- static const struct rhs_named_value stencil_op_values[] = -- { -- { "KEEP", 1 }, -- { "ZERO", 2 }, -- { "REPLACE", 3 }, -- { "INCR_SAT", 4 }, -- { "DECR_SAT", 5 }, -- { "INVERT", 6 }, -- { "INCR", 7 }, -- { "DECR", 8 }, -- { NULL } -- }; -+static const struct rhs_named_value comparison_values[] = -+{ -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+}; - -- static const struct rhs_named_value fill_values[] = -- { -- { "WIREFRAME", 2 }, -- { "SOLID", 3 }, -- { NULL } -- }; -+static const struct rhs_named_value stencil_op_values[] = -+{ -+ { "KEEP", 1 }, -+ { "ZERO", 2 }, -+ { "REPLACE", 3 }, -+ { "INCR_SAT", 4 }, -+ { "DECR_SAT", 5 }, -+ { "INVERT", 6 }, -+ { "INCR", 7 }, -+ { "DECR", 8 }, -+ { NULL } -+}; - -- static const struct rhs_named_value cull_values[] = -- { -- { "NONE", 1 }, -- { "FRONT", 2 }, -- { "BACK", 3 }, -- { NULL } -- }; -+static const struct rhs_named_value fill_values[] = -+{ -+ { "WIREFRAME", 2 }, -+ { "SOLID", 3 }, -+ { NULL } -+}; - -- static const struct rhs_named_value blend_values[] = -- { -- { "ZERO", 1 }, -- { "ONE", 2 }, -- { "SRC_COLOR", 3 }, -- { "INV_SRC_COLOR", 4 }, -- { "SRC_ALPHA", 5 }, -- { "INV_SRC_ALPHA", 6 }, -- { "DEST_ALPHA", 7 }, -- { "INV_DEST_ALPHA", 8 }, -- { "DEST_COLOR", 9 }, -- { "INV_DEST_COLOR", 10 }, -- { "SRC_ALPHA_SAT", 11 }, -- { "BLEND_FACTOR", 14 }, -- { "INV_BLEND_FACTOR", 15 }, -- { "SRC1_COLOR", 16 }, -- { "INV_SRC1_COLOR", 17 }, -- { "SRC1_ALPHA", 18 }, -- { "INV_SRC1_ALPHA", 19 }, -- { NULL } -- }; -+static const struct rhs_named_value cull_values[] = -+{ -+ { "NONE", 1 }, -+ { "FRONT", 2 }, -+ { "BACK", 3 }, -+ { NULL } -+}; - -- static const struct rhs_named_value blendop_values[] = -- { -- { "ADD", 1 }, -- { "SUBTRACT", 2 }, -- { "REV_SUBTRACT", 3 }, -- { "MIN", 4 }, -- { "MAX", 5 }, -- { NULL } -- }; -+static const struct rhs_named_value blend_values[] = -+{ -+ { "ZERO", 1 }, -+ { "ONE", 2 }, -+ { "SRC_COLOR", 3 }, -+ { "INV_SRC_COLOR", 4 }, -+ { "SRC_ALPHA", 5 }, -+ { "INV_SRC_ALPHA", 6 }, -+ { "DEST_ALPHA", 7 }, -+ { "INV_DEST_ALPHA", 8 }, -+ { "DEST_COLOR", 9 }, -+ { "INV_DEST_COLOR", 10 }, -+ { "SRC_ALPHA_SAT", 11 }, -+ { "BLEND_FACTOR", 14 }, -+ { "INV_BLEND_FACTOR", 15 }, -+ { "SRC1_COLOR", 16 }, -+ { "INV_SRC1_COLOR", 17 }, -+ { "SRC1_ALPHA", 18 }, -+ { "INV_SRC1_ALPHA", 19 }, -+ { NULL } -+}; - -- static const struct rhs_named_value bool_values[] = -- { -- { "FALSE", 0 }, -- { "TRUE", 1 }, -- { NULL } -- }; -+static const struct rhs_named_value blendop_values[] = -+{ -+ { "ADD", 1 }, -+ { "SUBTRACT", 2 }, -+ { "REV_SUBTRACT", 3 }, -+ { "MIN", 4 }, -+ { "MAX", 5 }, -+ { NULL } -+}; - -- static const struct rhs_named_value null_values[] = -- { -- { "NULL", 0 }, -- { NULL } -- }; -+static const struct rhs_named_value bool_values[] = -+{ -+ { "FALSE", 0 }, -+ { "TRUE", 1 }, -+ { NULL } -+}; - -- static const struct state -- { -- const char *name; -- enum hlsl_type_class container; -- enum hlsl_type_class class; -- enum state_property_component_type type; -- unsigned int dimx; -- unsigned int array_size; -- uint32_t id; -- const struct rhs_named_value *values; -- } -- states[] = -- { -- { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, -- { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, -- { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, -- { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, -- { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, -- -- { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, -- { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, -- { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, -- { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, -- { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, -- -- { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, -- { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, -- { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, -- { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, -- { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, -- { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, -- { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, -- { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, -- { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, -- { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, -- -- { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, -- { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, -- { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, -- { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, -- { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, -- { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, -- { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, -- { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, -- { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, -- { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, -- { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, -- { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, -- { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, -- { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, -- -- { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, -- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, -- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, -- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, -- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, -- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, -- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, -- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, -- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, -- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, -- { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, -- -- { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, -- { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, -- { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, -- }; -+static const struct rhs_named_value null_values[] = -+{ -+ { "NULL", 0 }, -+ { NULL } -+}; - -- static const struct state fx_4_blend_states[] = -- { -- { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, -- { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -- { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, -- { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, -- { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, -- { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, -- { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, -- { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, -- { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, -- }; -+static const struct fx_4_state -+{ -+ const char *name; -+ enum hlsl_type_class container; -+ enum hlsl_type_class class; -+ enum state_property_component_type type; -+ unsigned int dimx; -+ unsigned int array_size; -+ int id; -+ const struct rhs_named_value *values; -+} -+fx_4_states[] = -+{ -+ { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, -+ { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, -+ { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, -+ { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, -+ { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, -+ -+ { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, -+ { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, -+ { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, -+ { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, -+ { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, -+ -+ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, -+ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, -+ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, -+ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, -+ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, -+ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, -+ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, -+ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, -+ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, -+ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, -+ -+ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, -+ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, -+ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, -+ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, -+ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, -+ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, -+ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, -+ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, -+ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, -+ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, -+ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, -+ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, -+ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, -+ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, -+ -+ { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, -+ { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -+ { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, -+ { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, -+ { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, -+ { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, -+ { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, -+ { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, -+ { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, -+ -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, -+ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, -+ -+ { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, -+ { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, -+ { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, -+}; - -- static const struct state fx_5_blend_states[] = -+static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, -+ struct fx_write_context *fx) -+{ -+ static const struct fx_4_state fx_5_blend_states[] = - { - { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, - { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -@@ -2132,36 +2140,28 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - - struct state_table - { -- const struct state *ptr; -+ const struct fx_4_state *ptr; - unsigned int count; - } table; - - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - struct replace_state_context replace_context; -+ const struct fx_4_state *state = NULL; - struct hlsl_type *state_type = NULL; - struct hlsl_ir_node *node, *cast; -- const struct state *state = NULL; - struct hlsl_ctx *ctx = fx->ctx; - enum hlsl_base_type base_type; - unsigned int i; - -- if (type->class == HLSL_CLASS_BLEND_STATE) -+ if (type->class == HLSL_CLASS_BLEND_STATE && ctx->profile->major_version == 5) - { -- if (ctx->profile->major_version == 4) -- { -- table.ptr = fx_4_blend_states; -- table.count = ARRAY_SIZE(fx_4_blend_states); -- } -- else -- { -- table.ptr = fx_5_blend_states; -- table.count = ARRAY_SIZE(fx_5_blend_states); -- } -+ table.ptr = fx_5_blend_states; -+ table.count = ARRAY_SIZE(fx_5_blend_states); - } - else - { -- table.ptr = states; -- table.count = ARRAY_SIZE(states); -+ table.ptr = fx_4_states; -+ table.count = ARRAY_SIZE(fx_4_states); - } - - for (i = 0; i < table.count; ++i) -@@ -3071,6 +3071,9 @@ static void fx_parse_fx_4_annotations(struct fx_parser *parser) - const char *name, *type_name; - uint32_t count, i, value; - -+ if (parser->failed) -+ return; -+ - count = fx_parser_read_u32(parser); - - if (!count) -@@ -3329,6 +3332,10 @@ static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) - switch (type->typeinfo) - { - case FX_4_OBJECT_TYPE_STRING: -+ case FX_4_OBJECT_TYPE_BLEND_STATE: -+ case FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE: -+ case FX_4_OBJECT_TYPE_RASTERIZER_STATE: -+ case FX_4_OBJECT_TYPE_SAMPLER_STATE: - case FX_4_OBJECT_TYPE_PIXEL_SHADER: - case FX_4_OBJECT_TYPE_VERTEX_SHADER: - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -@@ -3343,9 +3350,160 @@ static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type) - } - } - -+static int fx_4_state_id_compare(const void *a, const void *b) -+{ -+ const struct fx_4_state *state = b; -+ int id = *(int *)a; -+ -+ return id - state->id; -+} -+ -+static void fx_4_parse_state_object_initializer(struct fx_parser *parser, uint32_t count, -+ enum hlsl_type_class type_class) -+{ -+ struct fx_4_assignment -+ { -+ uint32_t id; -+ uint32_t lhs_index; -+ uint32_t type; -+ uint32_t value; -+ } entry; -+ struct -+ { -+ uint32_t name; -+ uint32_t index; -+ } index; -+ struct -+ { -+ uint32_t type; -+ union -+ { -+ uint32_t u; -+ float f; -+ }; -+ } value; -+ static const char *value_types[FX_COMPONENT_TYPE_COUNT] = -+ { -+ [FX_BOOL] = "bool", -+ [FX_FLOAT] = "float", -+ [FX_UINT] = "uint", -+ [FX_UINT8] = "byte", -+ }; -+ const struct rhs_named_value *named_value; -+ uint32_t i, j, comp_count; -+ struct fx_4_state *state; -+ -+ for (i = 0; i < count; ++i) -+ { -+ fx_parser_read_u32s(parser, &entry, sizeof(entry)); -+ -+ if (!(state = bsearch(&entry.id, fx_4_states, ARRAY_SIZE(fx_4_states), -+ sizeof(*fx_4_states), fx_4_state_id_compare))) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, "Unrecognized state id %#x.\n", entry.id); -+ break; -+ } -+ -+ if (state->container != type_class) -+ { -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_INVALID_DATA, -+ "State '%s' does not belong to object type class %#x.", state->name, type_class); -+ break; -+ } -+ -+ parse_fx_print_indent(parser); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", state->name); -+ if (state->array_size > 1) -+ vkd3d_string_buffer_printf(&parser->buffer, "[%u]", entry.lhs_index); -+ vkd3d_string_buffer_printf(&parser->buffer, " = "); -+ -+ switch (entry.type) -+ { -+ case FX_4_ASSIGNMENT_CONSTANT: -+ -+ if (value_types[state->type]) -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", value_types[state->type]); -+ if (state->dimx > 1) -+ vkd3d_string_buffer_printf(&parser->buffer, "%u", state->dimx); -+ vkd3d_string_buffer_printf(&parser->buffer, "("); -+ -+ fx_parser_read_unstructured(parser, &comp_count, entry.value, sizeof(uint32_t)); -+ -+ named_value = NULL; -+ if (comp_count == 1 && state->values && (state->type == FX_UINT || state->type == FX_BOOL)) -+ { -+ const struct rhs_named_value *ptr = state->values; -+ -+ fx_parser_read_unstructured(parser, &value, entry.value + 4, sizeof(value)); -+ -+ while (ptr->name) -+ { -+ if (value.u == ptr->value) -+ { -+ named_value = ptr; -+ break; -+ } -+ ++ptr; -+ } -+ } -+ -+ if (named_value) -+ { -+ vkd3d_string_buffer_printf(&parser->buffer, "%s /* %u */", named_value->name, named_value->value); -+ } -+ else -+ { -+ uint32_t offset = entry.value + 4; -+ -+ for (j = 0; j < comp_count; ++j, offset += sizeof(value)) -+ { -+ fx_parser_read_unstructured(parser, &value, offset, sizeof(value)); -+ -+ if (state->type == FX_UINT8) -+ vkd3d_string_buffer_printf(&parser->buffer, "0x%.2x", value.u); -+ else if (state->type == FX_UINT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%u", value.u); -+ else if (state->type == FX_FLOAT) -+ vkd3d_string_buffer_printf(&parser->buffer, "%g", value.f); -+ -+ if (comp_count > 1 && j < comp_count - 1) -+ vkd3d_string_buffer_printf(&parser->buffer, ", "); -+ } -+ } -+ -+ vkd3d_string_buffer_printf(&parser->buffer, ")"); -+ -+ break; -+ case FX_4_ASSIGNMENT_VARIABLE: -+ vkd3d_string_buffer_printf(&parser->buffer, "%s", fx_4_get_string(parser, entry.value)); -+ break; -+ case FX_4_ASSIGNMENT_ARRAY_CONSTANT_INDEX: -+ fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index)); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s[%u]", fx_4_get_string(parser, index.name), index.index); -+ break; -+ case FX_4_ASSIGNMENT_ARRAY_VARIABLE_INDEX: -+ fx_parser_read_unstructured(parser, &index, entry.value, sizeof(index)); -+ vkd3d_string_buffer_printf(&parser->buffer, "%s[%s]", fx_4_get_string(parser, index.name), -+ fx_4_get_string(parser, index.index)); -+ break; -+ default: -+ fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -+ "Unsupported assignment type %u.\n", entry.type); -+ } -+ vkd3d_string_buffer_printf(&parser->buffer, ";\n"); -+ } -+} -+ - static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct fx_4_binary_type *type) - { -- unsigned int i, element_count; -+ static const enum hlsl_type_class type_classes[] = -+ { -+ [FX_4_OBJECT_TYPE_BLEND_STATE] = HLSL_CLASS_BLEND_STATE, -+ [FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE] = HLSL_CLASS_DEPTH_STENCIL_STATE, -+ [FX_4_OBJECT_TYPE_RASTERIZER_STATE] = HLSL_CLASS_RASTERIZER_STATE, -+ [FX_4_OBJECT_TYPE_SAMPLER_STATE] = HLSL_CLASS_SAMPLER, -+ }; -+ unsigned int i, element_count, count; - uint32_t value; - - if (!fx_4_object_has_initializer(type)) -@@ -3362,6 +3520,16 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct - value = fx_parser_read_u32(parser); - fx_4_parse_string_initializer(parser, value); - break; -+ case FX_4_OBJECT_TYPE_BLEND_STATE: -+ case FX_4_OBJECT_TYPE_DEPTH_STENCIL_STATE: -+ case FX_4_OBJECT_TYPE_RASTERIZER_STATE: -+ case FX_4_OBJECT_TYPE_SAMPLER_STATE: -+ count = fx_parser_read_u32(parser); -+ -+ parse_fx_start_indent(parser); -+ fx_4_parse_state_object_initializer(parser, count, type_classes[type->typeinfo]); -+ parse_fx_end_indent(parser); -+ break; - case FX_4_OBJECT_TYPE_PIXEL_SHADER: - case FX_4_OBJECT_TYPE_VERTEX_SHADER: - case FX_4_OBJECT_TYPE_GEOMETRY_SHADER: -@@ -3402,6 +3570,9 @@ static void fx_4_parse_objects(struct fx_parser *parser) - - for (i = 0; i < parser->object_count; ++i) - { -+ if (parser->failed) -+ return; -+ - fx_parser_read_u32s(parser, &var, sizeof(var)); - fx_parser_read_unstructured(parser, &type, var.type, sizeof(type)); - -@@ -3462,9 +3633,9 @@ static void fx_parse_fx_4_technique(struct fx_parser *parser) - parse_fx_print_indent(parser); - vkd3d_string_buffer_printf(&parser->buffer, "{\n"); - -- if (pass.count) -- fx_parser_error(parser, VKD3D_SHADER_ERROR_FX_NOT_IMPLEMENTED, -- "Parsing pass states is not implemented.\n"); -+ parse_fx_start_indent(parser); -+ fx_4_parse_state_object_initializer(parser, pass.count, HLSL_CLASS_PASS); -+ parse_fx_end_indent(parser); - - parse_fx_print_indent(parser); - vkd3d_string_buffer_printf(&parser->buffer, "}\n\n"); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 836e0ade32a..31cdb0eea76 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -142,7 +142,7 @@ static struct signature_element *vsir_signature_find_element_by_name( - return NULL; - } - --static bool vsir_signature_find_sysval(const struct shader_signature *signature, -+bool vsir_signature_find_sysval(const struct shader_signature *signature, - enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index) - { - const struct signature_element *e; -@@ -1727,8 +1727,34 @@ static void shader_signature_map_patch_constant_index_ranges(struct shader_signa - static int signature_element_register_compare(const void *a, const void *b) - { - const struct signature_element *e = a, *f = b; -+ int ret; -+ -+ if ((ret = vkd3d_u32_compare(e->register_index, f->register_index))) -+ return ret; - -- return vkd3d_u32_compare(e->register_index, f->register_index); -+ /* System values like SV_RenderTargetArrayIndex and SV_ViewPortArrayIndex -+ * can get packed into the same I/O register as non-system values, but -+ * only at the end. E.g.: -+ * -+ * vs_4_0 -+ * ... -+ * .output -+ * ... -+ * .param B.x, o1.x, uint -+ * .param C.y, o1.y, uint -+ * .param SV_RenderTargetArrayIndex.z, o1.z, uint, RTINDEX -+ * .text -+ * ... -+ * mov o1.xy, v1.xyxx -+ * mov o1.z, v1.z -+ * ret -+ * -+ * Because I/O normalisation doesn't split writes like the mov to o1.xy -+ * above, we want to make sure that o1.x and o1.y continue to be packed -+ * into a single register after I/O normalisation, so we order system -+ * values after non-system values here, allowing the non-system values to -+ * get merged into a single register. */ -+ return vkd3d_u32_compare(f->sysval_semantic, e->sysval_semantic); - } - - static int signature_element_index_compare(const void *a, const void *b) -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 3fa4d68a48a..74434eda01c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -2102,28 +2102,26 @@ static const struct vkd3d_spirv_resource_type - SpvDim dim; - uint32_t arrayed; - uint32_t ms; -- - unsigned int coordinate_component_count; -- unsigned int offset_component_count; - - SpvCapability capability; - SpvCapability uav_capability; - } - vkd3d_spirv_resource_type_table[] = - { -- {VKD3D_SHADER_RESOURCE_BUFFER, SpvDimBuffer, 0, 0, 1, 0, -+ {VKD3D_SHADER_RESOURCE_BUFFER, SpvDimBuffer, 0, 0, 1, - SpvCapabilitySampledBuffer, SpvCapabilityImageBuffer}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_1D, SpvDim1D, 0, 0, 1, 1, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_1D, SpvDim1D, 0, 0, 1, - SpvCapabilitySampled1D, SpvCapabilityImage1D}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3, 3}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 0}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, 1, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, - SpvCapabilitySampled1D, SpvCapabilityImage1D}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3, 2}, -- {VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 0, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3}, -+ {VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, - SpvCapabilitySampledCubeArray, SpvCapabilityImageCubeArray}, - }; - -@@ -8597,9 +8595,11 @@ static uint32_t spirv_compiler_emit_texel_offset(struct spirv_compiler *compiler - const struct vkd3d_shader_instruction *instruction, - const struct vkd3d_spirv_resource_type *resource_type_info) - { -+ unsigned int component_count = resource_type_info->coordinate_component_count - resource_type_info->arrayed; - const struct vkd3d_shader_texel_offset *offset = &instruction->texel_offset; -- unsigned int component_count = resource_type_info->offset_component_count; - int32_t data[4] = {offset->u, offset->v, offset->w, 0}; -+ -+ VKD3D_ASSERT(resource_type_info->dim != SpvDimCube); - return spirv_compiler_get_constant(compiler, - VKD3D_SHADER_COMPONENT_INT, component_count, (const uint32_t *)data); - } -@@ -8684,9 +8684,9 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - const struct vkd3d_shader_src_param *resource, *sampler; -+ unsigned int image_operand_count = 0, component_count; - uint32_t sampled_type_id, coordinate_id, val_id; - SpvImageOperandsMask operands_mask = 0; -- unsigned int image_operand_count = 0; - struct vkd3d_shader_image image; - uint32_t image_operands[3]; - uint32_t coordinate_mask; -@@ -8711,7 +8711,8 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - case VKD3DSIH_SAMPLE_GRAD: - op = SpvOpImageSampleExplicitLod; - operands_mask |= SpvImageOperandsGradMask; -- coordinate_mask = (1u << image.resource_type_info->offset_component_count) - 1; -+ component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed; -+ coordinate_mask = (1u << component_count) - 1; - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, - &src[3], coordinate_mask); - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, -@@ -8800,10 +8801,10 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst = instruction->dst; - const struct vkd3d_shader_src_param *src = instruction->src; - unsigned int image_flags = VKD3D_IMAGE_FLAG_SAMPLED; -+ unsigned int component_count, component_idx; - SpvImageOperandsMask operands_mask = 0; - unsigned int image_operand_count = 0; - struct vkd3d_shader_image image; -- unsigned int component_idx; - uint32_t image_operands[1]; - uint32_t coordinate_mask; - bool extended_offset; -@@ -8825,10 +8826,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, - - if (offset) - { -+ component_count = image.resource_type_info->coordinate_component_count - image.resource_type_info->arrayed; -+ VKD3D_ASSERT(image.resource_type_info->dim != SpvDimCube); - vkd3d_spirv_enable_capability(builder, SpvCapabilityImageGatherExtended); - operands_mask |= SpvImageOperandsOffsetMask; - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, -- offset, (1u << image.resource_type_info->offset_component_count) - 1); -+ offset, (1u << component_count) - 1); - } - else if (vkd3d_shader_instruction_has_texel_offset(instruction)) - { -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index db18e6d12bc..4b492f09bc6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1119,6 +1119,8 @@ static inline bool vsir_sysval_semantic_is_clip_cull(enum vkd3d_shader_sysval_se - - struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature, - unsigned int reg_idx, unsigned int write_mask); -+bool vsir_signature_find_sysval(const struct shader_signature *signature, -+ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index); - void shader_signature_cleanup(struct shader_signature *signature); - - struct dxbc_shader_desc --- -2.45.2 -