From 41367bc540e0bd865f25adb179e232fe15f3428d Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 20 Aug 2024 07:49:47 +1000 Subject: [PATCH] Updated vkd3d-latest patchset --- ...-b23874dad600ec777c0bbe6ecc7aa3f5020.patch | 17461 ++++++++++++++++ ...-c792114a6a58c7c97abf827d154d7ecd22d.patch | 8054 ------- ...-5a53b739959db74e8dcce023a7d49356b90.patch | 1053 - ...-c8cc1b1a2476a4c518756fd7604d37e8c16.patch | 1777 ++ ...-0202393d41f00d8c9f20f59ec080b833b54.patch | 398 - ...-7eb63a7c0d23a83bbdfcfa5ed83b9434370.patch | 153 - ...-947b937a1afc0f1d57b11883dad9ffb3fbd.patch | 1116 - ...-b23874dad600ec777c0bbe6ecc7aa3f5020.patch | 7977 ------- 8 files changed, 19238 insertions(+), 18751 deletions(-) create mode 100644 patches/vkd3d-latest/0001-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch delete mode 100644 patches/vkd3d-latest/0001-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch delete mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-5a53b739959db74e8dcce023a7d49356b90.patch create mode 100644 patches/vkd3d-latest/0002-Updated-vkd3d-to-c8cc1b1a2476a4c518756fd7604d37e8c16.patch delete mode 100644 patches/vkd3d-latest/0003-Updated-vkd3d-to-0202393d41f00d8c9f20f59ec080b833b54.patch delete mode 100644 patches/vkd3d-latest/0004-Updated-vkd3d-to-7eb63a7c0d23a83bbdfcfa5ed83b9434370.patch delete mode 100644 patches/vkd3d-latest/0005-Updated-vkd3d-to-947b937a1afc0f1d57b11883dad9ffb3fbd.patch delete mode 100644 patches/vkd3d-latest/0006-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch new file mode 100644 index 00000000..b36ab5e4 --- /dev/null +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch @@ -0,0 +1,17461 @@ +From 5c4fa57f02c41cd48f133e49965a1c5c51f428ed Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Thu, 7 Mar 2024 10:40:41 +1100 +Subject: [PATCH] Updated vkd3d to b23874dad600ec777c0bbe6ecc7aa3f5020476d1. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 76 +- + libs/vkd3d/include/vkd3d_shader.h | 241 ++++ + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + + libs/vkd3d/libs/vkd3d-common/debug.c | 13 +- + libs/vkd3d/libs/vkd3d-shader/checksum.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 27 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 714 ++++++---- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 6 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 156 ++- + libs/vkd3d/libs/vkd3d-shader/fx.c | 920 ++++++++++-- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 198 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 344 ++++- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 159 ++- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 134 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1150 ++++++++++++--- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1233 ++++++++++++++--- + .../libs/vkd3d-shader/hlsl_constant_ops.c | 110 +- + libs/vkd3d/libs/vkd3d-shader/ir.c | 663 +++++++-- + libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 5 +- + libs/vkd3d/libs/vkd3d-shader/preproc.y | 2 +- + libs/vkd3d/libs/vkd3d-shader/spirv.c | 543 +++++--- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 541 +++++--- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 42 +- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 33 +- + libs/vkd3d/libs/vkd3d/cache.c | 9 +- + libs/vkd3d/libs/vkd3d/command.c | 595 ++++---- + libs/vkd3d/libs/vkd3d/device.c | 190 ++- + libs/vkd3d/libs/vkd3d/resource.c | 56 +- + libs/vkd3d/libs/vkd3d/state.c | 243 +++- + libs/vkd3d/libs/vkd3d/utils.c | 4 +- + libs/vkd3d/libs/vkd3d/vkd3d_main.c | 8 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 12 +- + 33 files changed, 6405 insertions(+), 2027 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index a9d709d10fe..c62dc00415f 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -46,12 +46,22 @@ + + #define STATIC_ASSERT(e) extern void __VKD3D_STATIC_ASSERT__(int [(e) ? 1 : -1]) + ++#define VKD3D_ASSERT(cond) \ ++ do { \ ++ if (!(cond)) \ ++ ERR("Failed assertion: %s\n", #cond); \ ++ } while (0) ++ + #define MEMBER_SIZE(t, m) sizeof(((t *)0)->m) + + #define VKD3D_MAKE_TAG(ch0, ch1, ch2, ch3) \ + ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ + | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) + ++#define VKD3D_EXPAND(x) x ++#define VKD3D_STRINGIFY(x) #x ++#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) ++ + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') + #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') + #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') +@@ -98,17 +108,11 @@ static inline uint64_t align(uint64_t addr, size_t alignment) + # define VKD3D_UNREACHABLE (void)0 + #endif /* __GNUC__ */ + +-VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsigned int line) +-{ +- fprintf(stderr, "%s:%u: Aborting, reached unreachable code.\n", filename, line); +- abort(); +-} +- +-#ifdef NDEBUG +-#define vkd3d_unreachable() VKD3D_UNREACHABLE +-#else +-#define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) +-#endif ++#define vkd3d_unreachable() \ ++ do { \ ++ ERR("%s:%u: Unreachable code reached.\n", __FILE__, __LINE__); \ ++ VKD3D_UNREACHABLE; \ ++ } while (0) + + #ifdef VKD3D_NO_TRACE_MESSAGES + #define TRACE(args...) do { } while (0) +@@ -118,11 +122,19 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig + #ifdef VKD3D_NO_DEBUG_MESSAGES + #define WARN(args...) do { } while (0) + #define FIXME(args...) do { } while (0) ++#define WARN_ON() (false) ++#define FIXME_ONCE(args...) do { } while (0) ++#endif ++ ++#ifdef VKD3D_NO_ERROR_MESSAGES ++#define ERR(args...) do { } while (0) ++#define MESSAGE(args...) do { } while (0) + #endif + + enum vkd3d_dbg_level + { + VKD3D_DBG_LEVEL_NONE, ++ VKD3D_DBG_LEVEL_MESSAGE, + VKD3D_DBG_LEVEL_ERR, + VKD3D_DBG_LEVEL_FIXME, + VKD3D_DBG_LEVEL_WARN, +@@ -143,7 +155,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); + #define VKD3D_DBG_LOG(level) \ + do { \ + const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ +- VKD3D_DBG_PRINTF ++ VKD3D_DBG_PRINTF_##level + + #define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ + do { \ +@@ -151,24 +163,50 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); + const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ + ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ + vkd3d_dbg_next_time = true; \ +- VKD3D_DBG_PRINTF ++ VKD3D_DBG_PRINTF_##level + + #define VKD3D_DBG_PRINTF(...) \ + vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) + ++#define VKD3D_DBG_PRINTF_TRACE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) ++#define VKD3D_DBG_PRINTF_WARN(...) VKD3D_DBG_PRINTF(__VA_ARGS__) ++#define VKD3D_DBG_PRINTF_FIXME(...) VKD3D_DBG_PRINTF(__VA_ARGS__) ++#define VKD3D_DBG_PRINTF_MESSAGE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) ++ ++#ifdef VKD3D_ABORT_ON_ERR ++#define VKD3D_DBG_PRINTF_ERR(...) \ ++ vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); \ ++ abort(); \ ++ } while (0) ++#else ++#define VKD3D_DBG_PRINTF_ERR(...) VKD3D_DBG_PRINTF(__VA_ARGS__) ++#endif ++ ++/* Used by vkd3d_unreachable(). */ ++#ifdef VKD3D_CROSSTEST ++#undef ERR ++#define ERR(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0) ++#endif ++ + #ifndef TRACE +-#define TRACE VKD3D_DBG_LOG(TRACE) ++#define TRACE VKD3D_DBG_LOG(TRACE) + #endif + + #ifndef WARN +-#define WARN VKD3D_DBG_LOG(WARN) ++#define WARN VKD3D_DBG_LOG(WARN) + #endif + + #ifndef FIXME +-#define FIXME VKD3D_DBG_LOG(FIXME) ++#define FIXME VKD3D_DBG_LOG(FIXME) + #endif + +-#define ERR VKD3D_DBG_LOG(ERR) ++#ifndef ERR ++#define ERR VKD3D_DBG_LOG(ERR) ++#endif ++ ++#ifndef MESSAGE ++#define MESSAGE VKD3D_DBG_LOG(MESSAGE) ++#endif + + #ifndef TRACE_ON + #define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) +@@ -178,7 +216,9 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); + #define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) + #endif + ++#ifndef FIXME_ONCE + #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) ++#endif + + #define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name + +@@ -233,7 +273,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index d3afcc11b16..d4756810065 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -105,6 +105,11 @@ enum vkd3d_shader_structure_type + * \since 1.10 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO, ++ /** ++ * The structure is a vkd3d_shader_parameter_info structure. ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), + }; +@@ -453,44 +458,191 @@ enum vkd3d_shader_binding_flag + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), + }; + ++/** ++ * The manner in which a parameter value is provided to the shader, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + enum vkd3d_shader_parameter_type + { + VKD3D_SHADER_PARAMETER_TYPE_UNKNOWN, ++ /** The parameter value is embedded directly in the shader. */ + VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, ++ /** ++ * The parameter value is provided to the shader via a specialization ++ * constant. This value is only supported for the SPIR-V target type. ++ */ + VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, ++ /** ++ * The parameter value is provided to the shader as part of a uniform ++ * buffer. ++ * ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_PARAMETER_TYPE_BUFFER, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_TYPE), + }; + ++/** ++ * The format of data provided to the shader, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + enum vkd3d_shader_parameter_data_type + { + VKD3D_SHADER_PARAMETER_DATA_TYPE_UNKNOWN, ++ /** The parameter is provided as a 32-bit unsigned integer. */ + VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, ++ /** The parameter is provided as a 32-bit float. \since 1.13 */ ++ VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), + }; + ++/** ++ * Names a specific shader parameter, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + enum vkd3d_shader_parameter_name + { + VKD3D_SHADER_PARAMETER_NAME_UNKNOWN, ++ /** ++ * The sample count of the framebuffer, as returned by the HLSL function ++ * GetRenderTargetSampleCount() or the GLSL builtin gl_NumSamples. ++ * ++ * This parameter should be specified when compiling to SPIR-V, which ++ * provides no builtin ability to query this information from the shader. ++ * ++ * The default value is 1. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ */ + VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, ++ /** ++ * Alpha test comparison function. When this parameter is provided, if the ++ * alpha component of the pixel shader colour output at location 0 fails the ++ * test, as defined by this function and the reference value provided by ++ * VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, the fragment will be ++ * discarded. ++ * ++ * This parameter, along with VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, ++ * can be used to implement fixed function alpha test, as present in ++ * Direct3D versions up to 9, if the target environment does not support ++ * alpha test as part of its own fixed-function API (as Vulkan and core ++ * OpenGL). ++ * ++ * The default value is VKD3D_SHADER_COMPARISON_FUNC_ALWAYS. ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. The value specified must be ++ * a member of enum vkd3d_shader_comparison_func. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC, ++ /** ++ * Alpha test reference value. ++ * See VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC for documentation of ++ * alpha test. ++ * ++ * The default value is zero. ++ * ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, ++ /** ++ * Whether to use flat interpolation for fragment shader colour inputs. ++ * If the value is nonzero, inputs whose semantic usage is COLOR will use ++ * flat interpolation instead of linear. ++ * This parameter is ignored if the shader model is 4 or greater, since only ++ * shader model 3 and below do not specify the interpolation mode in the ++ * shader bytecode. ++ * ++ * This parameter can be used to implement fixed function shade mode, as ++ * present in Direct3D versions up to 9, if the target environment does not ++ * support shade mode as part of its own fixed-function API (as Vulkan and ++ * core OpenGL). ++ * ++ * The data type for this parameter must be ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ * ++ * The default value is zero, i.e. use linear interpolation. ++ * ++ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this ++ * version of vkd3d-shader. ++ * ++ * \since 1.13 ++ */ ++ VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), + }; + ++/** ++ * The value of an immediate constant parameter, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + struct vkd3d_shader_parameter_immediate_constant + { + union + { ++ /** ++ * The value if the parameter's data type is ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. ++ */ + uint32_t u32; ++ /** ++ * The value if the parameter's data type is ++ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. ++ * ++ * \since 1.13 ++ */ ++ float f32; + } u; + }; + ++/** ++ * The linkage of a specialization constant parameter, used in ++ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. ++ */ + struct vkd3d_shader_parameter_specialization_constant + { ++ /** The ID of the specialization constant. */ + uint32_t id; + }; + ++/** ++ * The linkage of a parameter specified through a uniform buffer, used in ++ * struct vkd3d_shader_parameter1. ++ */ ++struct vkd3d_shader_parameter_buffer ++{ ++ /** ++ * The set of the uniform buffer descriptor. If the target environment does ++ * not support descriptor sets, this value must be set to 0. ++ */ ++ unsigned int set; ++ /** The binding index of the uniform buffer descriptor. */ ++ unsigned int binding; ++ /** The byte offset of the parameter within the buffer. */ ++ uint32_t offset; ++}; ++ ++/** ++ * An individual shader parameter. ++ * ++ * This structure is an earlier version of struct vkd3d_shader_parameter1 ++ * which supports fewer parameter types; ++ * refer to that structure for usage information. ++ * ++ * Only the following types may be used with this structure: ++ * ++ * - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT ++ * - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT ++ */ + struct vkd3d_shader_parameter + { + enum vkd3d_shader_parameter_name name; +@@ -503,6 +655,56 @@ struct vkd3d_shader_parameter + } u; + }; + ++/** ++ * An individual shader parameter. ++ * ++ * This structure is used in struct vkd3d_shader_parameter_info; see there for ++ * explanation of shader parameters. ++ * ++ * For example, to specify the rasterizer sample count to the shader via an ++ * unsigned integer specialization constant with ID 3, ++ * set the following members: ++ * ++ * - \a name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT ++ * - \a type = VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT ++ * - \a data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32 ++ * - \a u.specialization_constant.id = 3 ++ * ++ * This structure is an extended version of struct vkd3d_shader_parameter. ++ */ ++struct vkd3d_shader_parameter1 ++{ ++ /** The builtin parameter to be mapped. */ ++ enum vkd3d_shader_parameter_name name; ++ /** How the parameter will be provided to the shader. */ ++ enum vkd3d_shader_parameter_type type; ++ /** ++ * The data type of the supplied parameter, which determines how it is to ++ * be interpreted. ++ */ ++ enum vkd3d_shader_parameter_data_type data_type; ++ union ++ { ++ /** ++ * Additional information if \a type is ++ * VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT. ++ */ ++ struct vkd3d_shader_parameter_immediate_constant immediate_constant; ++ /** ++ * Additional information if \a type is ++ * VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT. ++ */ ++ struct vkd3d_shader_parameter_specialization_constant specialization_constant; ++ /** ++ * Additional information if \a type is ++ * VKD3D_SHADER_PARAMETER_TYPE_BUFFER. ++ */ ++ struct vkd3d_shader_parameter_buffer buffer; ++ void *_pointer_pad; ++ uint32_t _pad[4]; ++ } u; ++}; ++ + /** + * Symbolic register indices for mapping uniform constant register sets in + * legacy Direct3D bytecode to constant buffer views in the target environment. +@@ -1994,6 +2196,44 @@ struct vkd3d_shader_varying_map_info + unsigned int varying_count; + }; + ++/** ++ * Interface information regarding a builtin shader parameter. ++ * ++ * Like compile options specified with struct vkd3d_shader_compile_option, ++ * parameters are used to specify certain values which are not part of the ++ * source shader bytecode but which need to be specified in the shader bytecode ++ * in the target format. ++ * Unlike struct vkd3d_shader_compile_option, however, this structure allows ++ * parameters to be specified in a variety of different ways, as described by ++ * enum vkd3d_shader_parameter_type. ++ * ++ * This structure is an extended version of struct vkd3d_shader_parameter as ++ * used in struct vkd3d_shader_spirv_target_info, which allows more parameter ++ * types to be used, and also allows specifying parameters when compiling ++ * shaders to target types other than SPIR-V. If this structure is chained ++ * along with vkd3d_shader_spirv_target_info, any parameters specified in the ++ * latter structure are ignored. ++ * ++ * This structure is passed to vkd3d_shader_compile() and extends ++ * vkd3d_shader_compile_info. ++ * ++ * This structure contains only input parameters. ++ * ++ * \since 1.13 ++ */ ++struct vkd3d_shader_parameter_info ++{ ++ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO. */ ++ enum vkd3d_shader_structure_type type; ++ /** Optional pointer to a structure containing further parameters. */ ++ const void *next; ++ ++ /** Pointer to an array of dynamic parameters for this shader instance. */ ++ const struct vkd3d_shader_parameter1 *parameters; ++ /** Size, in elements, of \ref parameters. */ ++ unsigned int parameter_count; ++}; ++ + #ifdef LIBVKD3D_SHADER_SOURCE + # define VKD3D_SHADER_API VKD3D_EXPORT + #else +@@ -2077,6 +2317,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported + * - vkd3d_shader_descriptor_offset_info + * - vkd3d_shader_hlsl_source_info + * - vkd3d_shader_interface_info ++ * - vkd3d_shader_parameter_info + * - vkd3d_shader_preprocess_info + * - vkd3d_shader_scan_combined_resource_sampler_info + * - vkd3d_shader_scan_descriptor_info +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index f60ef7db769..c2c6ad67804 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -20,6 +20,7 @@ + #define WIDL_C_INLINE_WRAPPERS + #endif + #define COBJMACROS ++ + #define CONST_VTABLE + #include "vkd3d.h" + #include "vkd3d_blob.h" +diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c +index 4523fc997ef..4bfc19bd9a1 100644 +--- a/libs/vkd3d/libs/vkd3d-common/debug.c ++++ b/libs/vkd3d/libs/vkd3d-common/debug.c +@@ -45,11 +45,12 @@ extern const char *const vkd3d_dbg_env_name; + + static const char *const debug_level_names[] = + { +- [VKD3D_DBG_LEVEL_NONE ] = "none", +- [VKD3D_DBG_LEVEL_ERR ] = "err", +- [VKD3D_DBG_LEVEL_FIXME] = "fixme", +- [VKD3D_DBG_LEVEL_WARN ] = "warn", +- [VKD3D_DBG_LEVEL_TRACE] = "trace", ++ [VKD3D_DBG_LEVEL_NONE ] = "none", ++ [VKD3D_DBG_LEVEL_MESSAGE] = "message", ++ [VKD3D_DBG_LEVEL_ERR ] = "err", ++ [VKD3D_DBG_LEVEL_FIXME] = "fixme", ++ [VKD3D_DBG_LEVEL_WARN ] = "warn", ++ [VKD3D_DBG_LEVEL_TRACE] = "trace", + }; + + enum vkd3d_dbg_level vkd3d_dbg_get_level(void) +@@ -104,8 +105,6 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch + if (vkd3d_dbg_get_level() < level) + return; + +- assert(level < ARRAY_SIZE(debug_level_names)); +- + #ifdef _WIN32 + vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); + #elif HAVE_GETTID +diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c +index 0910729a0e9..d9560628c77 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/checksum.c ++++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c +@@ -288,7 +288,7 @@ void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksu + const uint8_t *ptr = dxbc; + struct md5_ctx ctx; + +- assert(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); ++ VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); + ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; + size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 9abc2c4db70..2c2f0c43ece 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -254,6 +254,10 @@ static const char * const shader_opcode_names[] = + [VKD3DSIH_PHASE ] = "phase", + [VKD3DSIH_PHI ] = "phi", + [VKD3DSIH_POW ] = "pow", ++ [VKD3DSIH_QUAD_READ_ACROSS_D ] = "quad_read_across_d", ++ [VKD3DSIH_QUAD_READ_ACROSS_X ] = "quad_read_across_x", ++ [VKD3DSIH_QUAD_READ_ACROSS_Y ] = "quad_read_across_y", ++ [VKD3DSIH_QUAD_READ_LANE_AT ] = "quad_read_lane_at", + [VKD3DSIH_RCP ] = "rcp", + [VKD3DSIH_REP ] = "rep", + [VKD3DSIH_RESINFO ] = "resinfo", +@@ -1199,7 +1203,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const + { + bool untyped = false; + +- switch (compiler->current->handler_idx) ++ switch (compiler->current->opcode) + { + case VKD3DSIH_MOV: + case VKD3DSIH_MOVC: +@@ -1755,7 +1759,7 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile + { + struct vkd3d_string_buffer *buffer = &compiler->buffer; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_BREAKP: + case VKD3DSIH_CONTINUEP: +@@ -1853,8 +1857,13 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile + break; + + case VKD3DSIH_TEX: +- if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) +- vkd3d_string_buffer_printf(buffer, "p"); ++ if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0)) ++ { ++ if (ins->flags & VKD3DSI_TEXLD_PROJECT) ++ vkd3d_string_buffer_printf(buffer, "p"); ++ else if (ins->flags & VKD3DSI_TEXLD_BIAS) ++ vkd3d_string_buffer_printf(buffer, "b"); ++ } + break; + + case VKD3DSIH_WAVE_OP_ADD: +@@ -1910,7 +1919,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, + } + else + { +- assert(icb->component_count == VKD3D_VEC4_SIZE); ++ VKD3D_ASSERT(icb->component_count == VKD3D_VEC4_SIZE); + for (i = 0; i < icb->element_count; ++i) + { + shader_print_hex_literal(compiler, " {", icb->data[4 * i + 0], ""); +@@ -1937,9 +1946,9 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, + if (ins->coissue) + vkd3d_string_buffer_printf(buffer, "+"); + +- shader_print_opcode(compiler, ins->handler_idx); ++ shader_print_opcode(compiler, ins->opcode); + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL: + case VKD3DSIH_DCL_UAV_TYPED: +@@ -2430,7 +2439,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_ENDIF: +@@ -2459,7 +2468,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, + + shader_dump_instruction(&compiler, ins); + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_ELSE: + case VKD3DSIH_IF: +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index bfd5b52b436..a4c038a233a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -757,7 +757,7 @@ static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, + { + /* d3d shaders have a maximum of 8192 constants; we should not overrun + * this array. */ +- assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); ++ VKD3D_ASSERT((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); + bitmap_set(sm1->constants[set].def_mask, index); + } + } +@@ -1060,7 +1060,7 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) + + static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) + { +- if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) ++ if ((ins->opcode == VKD3DSIH_BREAKP || ins->opcode == VKD3DSIH_IF) && ins->flags) + { + vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, + "Ignoring unexpected instruction flags %#x.", ins->flags); +@@ -1142,23 +1142,23 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + goto fail; + } + +- if (ins->handler_idx == VKD3DSIH_DCL) ++ if (ins->opcode == VKD3DSIH_DCL) + { + shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); + } +- else if (ins->handler_idx == VKD3DSIH_DEF) ++ else if (ins->opcode == VKD3DSIH_DEF) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_FLOAT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } +- else if (ins->handler_idx == VKD3DSIH_DEFB) ++ else if (ins->opcode == VKD3DSIH_DEFB) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_SCALAR, VKD3D_DATA_UINT); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); + } +- else if (ins->handler_idx == VKD3DSIH_DEFI) ++ else if (ins->opcode == VKD3DSIH_DEFI) + { + shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_INT); +@@ -1195,7 +1195,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str + return; + + fail: +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + *ptr = sm1->end; + } + +@@ -1272,7 +1272,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st + sm1->end = &code[token_count]; + + /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) ++ if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); +@@ -1326,7 +1326,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c + ins = &instructions->elements[instructions->count]; + shader_sm1_read_instruction(&sm1, ins); + +- if (ins->handler_idx == VKD3DSIH_INVALID) ++ if (ins->opcode == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + vsir_program_cleanup(program); +@@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c + return ret; + } + +-bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) ++bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) + { + unsigned int i; + +@@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + bool output; + enum vkd3d_shader_type shader_type; + unsigned int major_version; +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + unsigned int offset; + } + register_table[] = + { +- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, +- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, +- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, +- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, +- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, +- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, +- +- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, +- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, +- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, ++ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, ++ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, ++ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, ++ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, ++ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) + { +- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) ++ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) + && output == register_table[i].output +- && ctx->profile->type == register_table[i].shader_type +- && ctx->profile->major_version == register_table[i].major_version) ++ && version->type == register_table[i].shader_type ++ && version->major == register_table[i].major_version) + { + *type = register_table[i].type; +- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) ++ if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) + *reg = register_table[i].offset; + else +- *reg = semantic->index; ++ *reg = semantic_index; + return true; + } + } +@@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + return false; + } + +-bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) ++bool hlsl_sm1_usage_from_semantic(const char *semantic_name, ++ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) + { + static const struct + { +@@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { +- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) ++ if (!ascii_strcasecmp(semantic_name, semantics[i].name)) + { + *usage = semantics[i].usage; +- *usage_idx = semantic->index; ++ *usage_idx = semantic_index; + return true; + } + } +@@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU + return false; + } + ++struct d3dbc_compiler ++{ ++ struct vsir_program *program; ++ struct vkd3d_bytecode_buffer buffer; ++ struct vkd3d_shader_message_context *message_context; ++ ++ /* OBJECTIVE: Store all the required information in the other fields so ++ * that this hlsl_ctx is no longer necessary. */ ++ struct hlsl_ctx *ctx; ++}; ++ + static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) + { + if (type == VKD3D_SHADER_TYPE_VERTEX) +@@ -1480,7 +1492,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + case HLSL_CLASS_ARRAY: + return hlsl_sm1_class(type->e.array.type); + case HLSL_CLASS_MATRIX: +- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3DXPC_MATRIX_COLUMNS; + else +@@ -1497,13 +1509,20 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPC_OBJECT; ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + break; + } + +@@ -1593,13 +1612,20 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) + case HLSL_CLASS_VERTEX_SHADER: + return D3DXPT_VERTEXSHADER; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + break; + } + +@@ -1677,8 +1703,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) + list_move_tail(&ctx->extern_vars, &sorted); + } + +-static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- struct hlsl_ir_function_decl *entry_func) ++void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) + { + size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; + unsigned int uniform_count = 0; +@@ -1739,11 +1764,11 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + } + else + { +- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); ++ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); + put_u32(buffer, var->bind_count[r]); + } + put_u32(buffer, 0); /* type */ +- put_u32(buffer, 0); /* FIXME: default value */ ++ put_u32(buffer, 0); /* default value */ + } + } + +@@ -1767,6 +1792,62 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + + write_sm1_type(buffer, var->data_type, ctab_start); + set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); ++ ++ if (var->default_values) ++ { ++ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int comp_count = hlsl_type_component_count(var->data_type); ++ unsigned int default_value_offset; ++ unsigned int k; ++ ++ default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); ++ set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); ++ ++ for (k = 0; k < comp_count; ++k) ++ { ++ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ unsigned int comp_offset; ++ enum hlsl_regset regset; ++ ++ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); ++ if (regset == HLSL_REGSET_NUMERIC) ++ { ++ union ++ { ++ uint32_t u; ++ float f; ++ } uni; ++ ++ switch (comp_type->e.numeric.type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ hlsl_fixme(ctx, &var->loc, "Write double default values."); ++ uni.u = 0; ++ break; ++ ++ case HLSL_TYPE_INT: ++ uni.f = var->default_values[k].value.i; ++ break; ++ ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ uni.f = var->default_values[k].value.u; ++ break; ++ ++ case HLSL_TYPE_HALF: ++ case HLSL_TYPE_FLOAT: ++ uni.u = var->default_values[k].value.u; ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); ++ } ++ } ++ } ++ + ++uniform_count; + } + } +@@ -1778,7 +1859,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); + } + +-static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) ++static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) + { + return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) + | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); +@@ -1791,7 +1872,7 @@ struct sm1_instruction + + struct sm1_dst_register + { +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + D3DSHADER_PARAM_DSTMOD_TYPE mod; + unsigned int writemask; + uint32_t reg; +@@ -1799,19 +1880,45 @@ struct sm1_instruction + + struct sm1_src_register + { +- D3DSHADER_PARAM_REGISTER_TYPE type; ++ enum vkd3d_shader_register_type type; + D3DSHADER_PARAM_SRCMOD_TYPE mod; + unsigned int swizzle; + uint32_t reg; +- } srcs[3]; ++ } srcs[4]; + unsigned int src_count; + + unsigned int has_dst; + }; + ++static bool is_inconsequential_instr(const struct sm1_instruction *instr) ++{ ++ const struct sm1_src_register *src = &instr->srcs[0]; ++ const struct sm1_dst_register *dst = &instr->dst; ++ unsigned int i; ++ ++ if (instr->opcode != D3DSIO_MOV) ++ return false; ++ if (dst->mod != D3DSPDM_NONE) ++ return false; ++ if (src->mod != D3DSPSM_NONE) ++ return false; ++ if (src->type != dst->type) ++ return false; ++ if (src->reg != dst->reg) ++ return false; ++ ++ for (i = 0; i < 4; ++i) ++ { ++ if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) ++ return false; ++ } ++ ++ return true; ++} ++ + static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) + { +- assert(reg->writemask); ++ VKD3D_ASSERT(reg->writemask); + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); + } + +@@ -1821,15 +1928,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, + put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); + } + +-static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct sm1_instruction *instr) ++static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + uint32_t token = instr->opcode; + unsigned int i; + ++ if (is_inconsequential_instr(instr)) ++ return; ++ + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); + +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -1845,54 +1956,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w + src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); + } + +-static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, +- const struct hlsl_reg *src3) ++static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) + { + struct sm1_instruction instr = + { + .opcode = D3DSIO_DP2ADD, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, +- .srcs[2].type = D3DSPR_TEMP, ++ .srcs[2].type = VKD3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, + }; + +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, +- const struct hlsl_reg *src2, const struct hlsl_reg *src3) ++static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, ++ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, ++ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, +- .srcs[2].type = D3DSPR_TEMP, ++ .srcs[2].type = VKD3DSPR_TEMP, + .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), + .srcs[2].reg = src3->id, + .src_count = 3, +@@ -1901,26 +2011,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, +@@ -1928,49 +2037,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); + sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src1, const struct hlsl_reg *src2) ++static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), + .srcs[0].reg = src1->id, +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), + .srcs[1].reg = src2->id, + .src_count = 2, + }; + +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, +- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) ++static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src, ++ D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) + { + struct sm1_instruction instr = + { + .opcode = opcode, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.mod = dst_mod, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .srcs[0].mod = src_mod, +@@ -1978,19 +2086,19 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe + }; + + sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &instr); ++ d3dbc_write_instruction(d3dbc, &instr); + } + +-static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + const struct hlsl_ir_node *arg1 = expr->operands[0].node; + const struct hlsl_type *dst_type = expr->node.data_type; + const struct hlsl_type *src_type = arg1->data_type; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + + /* Narrowing casts were already lowered. */ +- assert(src_type->dimx == dst_type->dimx); ++ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + + switch (dst_type->e.numeric.type) + { +@@ -2004,7 +2112,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_TYPE_DOUBLE: +@@ -2028,7 +2136,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + break; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_TYPE_BOOL: +@@ -2057,8 +2165,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + +-static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + unsigned int i, x; + + for (i = 0; i < ctx->constant_defs.count; ++i) +@@ -2067,12 +2178,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + uint32_t token = D3DSIO_DEF; + const struct sm1_dst_register reg = + { +- .type = D3DSPR_CONST, ++ .type = VKD3DSPR_CONST, + .writemask = VKD3DSP_WRITEMASK_ALL, + .reg = constant_reg->index, + }; + +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 5 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2082,32 +2193,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + } + } + +-static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_var *var, bool output) ++static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, ++ const struct signature_element *element, bool output) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + uint32_t token, usage_idx; + D3DDECLUSAGE usage; + bool ret; + +- if ((!output && !var->last_read) || (output && !var->first_write)) +- return; +- +- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) ++ if (hlsl_sm1_register_from_semantic(version, element->semantic_name, ++ element->semantic_index, output, ®.type, ®.reg)) + { + usage = 0; + usage_idx = 0; + } + else + { +- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); +- assert(ret); +- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; +- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; ++ ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); ++ VKD3D_ASSERT(ret); ++ reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; ++ reg.reg = element->register_index; + } + + token = D3DSIO_DCL; +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2116,39 +2227,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; + put_u32(buffer, token); + +- reg.writemask = (1 << var->data_type->dimx) - 1; ++ reg.writemask = element->mask; + write_sm1_dst_register(buffer, ®); + } + +-static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) + { ++ struct vsir_program *program = d3dbc->program; ++ const struct vkd3d_shader_version *version; + bool write_in = false, write_out = false; +- struct hlsl_ir_var *var; + +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) ++ version = &program->shader_version; ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) + write_in = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) ++ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) + write_in = write_out = true; +- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) ++ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) + write_in = true; + +- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ if (write_in) + { +- if (write_in && var->is_input_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, false); +- if (write_out && var->is_output_semantic) +- write_sm1_semantic_dcl(ctx, buffer, var, true); ++ for (unsigned int i = 0; i < program->input_signature.element_count; ++i) ++ d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); ++ } ++ ++ if (write_out) ++ { ++ for (unsigned int i = 0; i < program->output_signature.element_count; ++i) ++ d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); + } + } + +-static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, + unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; + uint32_t token, res_type = 0; + + token = D3DSIO_DCL; +- if (ctx->profile->major_version > 1) ++ if (version->major > 1) + token |= 2 << D3DSI_INSTLENGTH_SHIFT; + put_u32(buffer, token); + +@@ -2175,20 +2294,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu + token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; + put_u32(buffer, token); + +- reg.type = D3DSPR_SAMPLER; ++ reg.type = VKD3DSPR_COMBINED_SAMPLER; + reg.writemask = VKD3DSP_WRITEMASK_ALL; + reg.reg = reg_id; + + write_sm1_dst_register(buffer, ®); + } + +-static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) ++static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + enum hlsl_sampler_dim sampler_dim; + unsigned int i, count, reg_id; + struct hlsl_ir_var *var; + +- if (ctx->profile->major_version < 2) ++ if (version->major < 2) + return; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) +@@ -2210,39 +2331,38 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b + continue; + } + +- reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; +- write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); ++ reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; ++ d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); + } + } + } + } + +-static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_CONST, ++ .srcs[0].type = VKD3DSPR_CONST, + .srcs[0].reg = constant->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), + .src_count = 1, + }; + +- assert(instr->reg.allocated); +- assert(constant->reg.allocated); ++ VKD3D_ASSERT(instr->reg.allocated); ++ VKD3D_ASSERT(constant->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, ++static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, + const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) + { + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); +@@ -2255,28 +2375,69 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ + + src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); + dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); +- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); ++ d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); + } + } + +-static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, ++ const struct hlsl_reg *dst, const struct hlsl_reg *src) + { ++ struct sm1_instruction instr = ++ { ++ .opcode = D3DSIO_SINCOS, ++ ++ .dst.type = VKD3DSPR_TEMP, ++ .dst.writemask = dst->writemask, ++ .dst.reg = dst->id, ++ .has_dst = 1, ++ ++ .srcs[0].type = VKD3DSPR_TEMP, ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), ++ .srcs[0].reg = src->id, ++ .src_count = 1, ++ }; ++ ++ if (op == HLSL_OP1_COS_REDUCED) ++ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); ++ else /* HLSL_OP1_SIN_REDUCED */ ++ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); ++ ++ if (d3dbc->ctx->profile->major_version < 3) ++ { ++ instr.src_count = 3; ++ ++ instr.srcs[1].type = VKD3DSPR_CONST; ++ instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); ++ instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; ++ ++ instr.srcs[2].type = VKD3DSPR_CONST; ++ instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); ++ instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; ++ } ++ ++ d3dbc_write_instruction(d3dbc, &instr); ++} ++ ++static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) ++{ ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); + struct hlsl_ir_node *arg1 = expr->operands[0].node; + struct hlsl_ir_node *arg2 = expr->operands[1].node; + struct hlsl_ir_node *arg3 = expr->operands[2].node; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + +- assert(instr->reg.allocated); ++ VKD3D_ASSERT(instr->reg.allocated); + + if (expr->op == HLSL_OP1_REINTERPRET) + { +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + return; + } + + if (expr->op == HLSL_OP1_CAST) + { +- write_sm1_cast(ctx, buffer, instr); ++ d3dbc_write_cast(d3dbc, instr); + return; + } + +@@ -2290,70 +2451,75 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + switch (expr->op) + { + case HLSL_OP1_ABS: +- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSX: +- write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_DSY: +- write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + break; + + case HLSL_OP1_EXP2: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); + break; + + case HLSL_OP1_LOG2: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); + break; + + case HLSL_OP1_NEG: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + break; + + case HLSL_OP1_SAT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + break; + + case HLSL_OP1_RCP: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); + break; + + case HLSL_OP1_RSQ: +- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); ++ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); ++ break; ++ ++ case HLSL_OP1_COS_REDUCED: ++ case HLSL_OP1_SIN_REDUCED: ++ d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); + break; + + case HLSL_OP2_ADD: +- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MAX: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MIN: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_MUL: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP1_FRACT: +- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); ++ d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + break; + + case HLSL_OP2_DOT: + switch (arg1->data_type->dimx) + { + case 4: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case 3: +- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + break; + + default: +@@ -2362,27 +2528,31 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + break; + + case HLSL_OP2_LOGIC_AND: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_LOGIC_OR: +- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP2_SLT: +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL) + hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); +- write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); ++ d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + break; + + case HLSL_OP3_CMP: +- if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) ++ if (version->type == VKD3D_SHADER_TYPE_VERTEX) + hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); +- write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + case HLSL_OP3_DP2ADD: +- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); ++ break; ++ ++ case HLSL_OP3_MAD: ++ d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + default: +@@ -2391,50 +2561,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + } + +-static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block); ++static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); + +-static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_if *iff = hlsl_ir_if(instr); + const struct hlsl_ir_node *condition; + struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; + + condition = iff->condition.node; +- assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); ++ VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + + sm1_ifc = (struct sm1_instruction) + { + .opcode = D3DSIO_IFC, + .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[0].reg = condition->reg.id, + .srcs[0].mod = 0, + +- .srcs[1].type = D3DSPR_TEMP, ++ .srcs[1].type = VKD3DSPR_TEMP, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), + .srcs[1].reg = condition->reg.id, + .srcs[1].mod = D3DSPSM_NEG, + + .src_count = 2, + }; +- write_sm1_instruction(ctx, buffer, &sm1_ifc); +- write_sm1_block(ctx, buffer, &iff->then_block); ++ d3dbc_write_instruction(d3dbc, &sm1_ifc); ++ d3dbc_write_block(d3dbc, &iff->then_block); + + if (!list_empty(&iff->else_block.instrs)) + { + sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; +- write_sm1_instruction(ctx, buffer, &sm1_else); +- write_sm1_block(ctx, buffer, &iff->else_block); ++ d3dbc_write_instruction(d3dbc, &sm1_else); ++ d3dbc_write_block(d3dbc, &iff->else_block); + } + + sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; +- write_sm1_instruction(ctx, buffer, &sm1_endif); ++ d3dbc_write_instruction(d3dbc, &sm1_endif); + } + +-static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + +@@ -2448,54 +2617,55 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + { + .opcode = D3DSIO_TEXKILL, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + break; + } + + default: +- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); ++ hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + } + } + +-static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) ++static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), + .src_count = 1, + }; + +- assert(instr->reg.allocated); ++ VKD3D_ASSERT(instr->reg.allocated); + + if (load->src.var->is_uniform) + { +- assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_CONST; ++ VKD3D_ASSERT(reg.allocated); ++ sm1_instr.srcs[0].type = VKD3DSPR_CONST; + } + else if (load->src.var->is_input_semantic) + { +- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, +- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) ++ if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, ++ load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + { +- assert(reg.allocated); +- sm1_instr.srcs[0].type = D3DSPR_INPUT; ++ VKD3D_ASSERT(reg.allocated); ++ sm1_instr.srcs[0].type = VKD3DSPR_INPUT; + sm1_instr.srcs[0].reg = reg.id; + } + else +@@ -2503,32 +2673,34 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + struct hlsl_ir_node *coords = load->coords.node; ++ struct hlsl_ir_node *ddx = load->ddx.node; ++ struct hlsl_ir_node *ddy = load->ddy.node; + unsigned int sampler_offset, reg_id; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + struct sm1_instruction sm1_instr; + + sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); +- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; ++ reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; + + sm1_instr = (struct sm1_instruction) + { +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = coords->reg.id, +- .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), ++ .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), + +- .srcs[1].type = D3DSPR_SAMPLER, ++ .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, + .srcs[1].reg = reg_id, + .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + +@@ -2546,69 +2718,82 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ + sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + break; + ++ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: ++ sm1_instr.opcode = D3DSIO_TEX; ++ sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; ++ break; ++ ++ case HLSL_RESOURCE_SAMPLE_GRAD: ++ sm1_instr.opcode = D3DSIO_TEXLDD; ++ ++ sm1_instr.srcs[2].type = VKD3DSPR_TEMP; ++ sm1_instr.srcs[2].reg = ddx->reg.id; ++ sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); ++ ++ sm1_instr.srcs[3].type = VKD3DSPR_TEMP; ++ sm1_instr.srcs[3].reg = ddy->reg.id; ++ sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); ++ ++ sm1_instr.src_count += 2; ++ break; ++ + default: + hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); + return; + } + +- assert(instr->reg.allocated); ++ VKD3D_ASSERT(instr->reg.allocated); + +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { ++ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + const struct hlsl_ir_store *store = hlsl_ir_store(instr); +- const struct hlsl_ir_node *rhs = store->rhs.node; ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); ++ const struct hlsl_ir_node *rhs = store->rhs.node; + struct sm1_instruction sm1_instr = + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = reg.id, + .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = rhs->reg.id, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), + .src_count = 1, + }; + +- if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) +- { +- hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks."); +- return; +- } +- + if (store->lhs.var->is_output_semantic) + { +- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) ++ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) + { +- sm1_instr.dst.type = D3DSPR_TEMP; ++ sm1_instr.dst.type = VKD3DSPR_TEMP; + sm1_instr.dst.reg = 0; + } +- else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, +- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) ++ else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, ++ store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + { +- assert(reg.allocated); +- sm1_instr.dst.type = D3DSPR_OUTPUT; ++ VKD3D_ASSERT(reg.allocated); ++ sm1_instr.dst.type = VKD3DSPR_OUTPUT; + sm1_instr.dst.reg = reg.id; + } + else + sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; + } + else +- assert(reg.allocated); ++ VKD3D_ASSERT(reg.allocated); + + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_ir_node *instr) ++static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) + { + const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); + const struct hlsl_ir_node *val = swizzle->val.node; +@@ -2616,27 +2801,27 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer + { + .opcode = D3DSIO_MOV, + +- .dst.type = D3DSPR_TEMP, ++ .dst.type = VKD3DSPR_TEMP, + .dst.reg = instr->reg.id, + .dst.writemask = instr->reg.writemask, + .has_dst = 1, + +- .srcs[0].type = D3DSPR_TEMP, ++ .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].reg = val->reg.id, + .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), + swizzle->swizzle, instr->data_type->dimx), + .src_count = 1, + }; + +- assert(instr->reg.allocated); +- assert(val->reg.allocated); ++ VKD3D_ASSERT(instr->reg.allocated); ++ VKD3D_ASSERT(val->reg.allocated); + sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); +- write_sm1_instruction(ctx, buffer, &sm1_instr); ++ d3dbc_write_instruction(d3dbc, &sm1_instr); + } + +-static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +- const struct hlsl_block *block) ++static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) + { ++ struct hlsl_ctx *ctx = d3dbc->ctx; + const struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) +@@ -2656,38 +2841,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: +- write_sm1_constant(ctx, buffer, instr); ++ d3dbc_write_constant(d3dbc, instr); + break; + + case HLSL_IR_EXPR: +- write_sm1_expr(ctx, buffer, instr); ++ d3dbc_write_expr(d3dbc, instr); + break; + + case HLSL_IR_IF: + if (hlsl_version_ge(ctx, 2, 1)) +- write_sm1_if(ctx, buffer, instr); ++ d3dbc_write_if(d3dbc, instr); + else + hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); + break; + + case HLSL_IR_JUMP: +- write_sm1_jump(ctx, buffer, instr); ++ d3dbc_write_jump(d3dbc, instr); + break; + + case HLSL_IR_LOAD: +- write_sm1_load(ctx, buffer, instr); ++ d3dbc_write_load(d3dbc, instr); + break; + + case HLSL_IR_RESOURCE_LOAD: +- write_sm1_resource_load(ctx, buffer, instr); ++ d3dbc_write_resource_load(d3dbc, instr); + break; + + case HLSL_IR_STORE: +- write_sm1_store(ctx, buffer, instr); ++ d3dbc_write_store(d3dbc, instr); + break; + + case HLSL_IR_SWIZZLE: +- write_sm1_swizzle(ctx, buffer, instr); ++ d3dbc_write_swizzle(d3dbc, instr); + break; + + default: +@@ -2696,32 +2881,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * + } + } + +-int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) ++/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving ++ * data from the other parameters instead, so it can be removed as an argument ++ * and be declared in vkd3d_shader_private.h and used without relying on HLSL ++ * IR structs. */ ++int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { +- struct vkd3d_bytecode_buffer buffer = {0}; ++ const struct vkd3d_shader_version *version = &program->shader_version; ++ struct d3dbc_compiler d3dbc = {0}; ++ struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; ++ ++ d3dbc.ctx = ctx; ++ d3dbc.program = program; ++ d3dbc.message_context = message_context; + +- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); ++ put_u32(buffer, sm1_version(version->type, version->major, version->minor)); + +- write_sm1_uniforms(ctx, &buffer, entry_func); ++ bytecode_put_bytes(buffer, ctab->code, ctab->size); + +- write_sm1_constant_defs(ctx, &buffer); +- write_sm1_semantic_dcls(ctx, &buffer); +- write_sm1_sampler_dcls(ctx, &buffer); +- write_sm1_block(ctx, &buffer, &entry_func->body); ++ d3dbc_write_constant_defs(&d3dbc); ++ d3dbc_write_semantic_dcls(&d3dbc); ++ d3dbc_write_sampler_dcls(&d3dbc); ++ d3dbc_write_block(&d3dbc, &entry_func->body); + +- put_u32(&buffer, D3DSIO_END); ++ put_u32(buffer, D3DSIO_END); + +- if (buffer.status) +- ctx->result = buffer.status; ++ if (buffer->status) ++ ctx->result = buffer->status; + + if (!ctx->result) + { +- out->code = buffer.data; +- out->size = buffer.size; ++ out->code = buffer->data; ++ out->size = buffer->size; + } + else + { +- vkd3d_free(buffer.data); ++ vkd3d_free(buffer->data); + } + return ctx->result; + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +index 4b9f67235aa..184788dc57e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +@@ -29,7 +29,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void + { + struct vkd3d_shader_dxbc_section_desc *section; + +- assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); ++ VKD3D_ASSERT(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); + + section = &dxbc->sections[dxbc->section_count++]; + section->tag = tag; +@@ -983,7 +983,7 @@ static int shader_parse_root_signature(const struct vkd3d_shader_code *data, + { + struct vkd3d_shader_root_signature_desc1 *v_1_1 = &desc->u.v_1_1; + +- assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); ++ VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); + + v_1_1->parameter_count = count; + if (v_1_1->parameter_count) +@@ -1777,7 +1777,7 @@ int vkd3d_shader_convert_root_signature(struct vkd3d_shader_versioned_root_signa + } + else + { +- assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); ++ VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); + ret = convert_root_signature_to_v1_1(dst, src); + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 73a8d8687c5..2a0bbe1a625 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -458,6 +458,8 @@ enum dx_intrinsic_opcode + DX_WAVE_ACTIVE_OP = 119, + DX_WAVE_ACTIVE_BIT = 120, + DX_WAVE_PREFIX_OP = 121, ++ DX_QUAD_READ_LANE_AT = 122, ++ DX_QUAD_OP = 123, + DX_LEGACY_F32TOF16 = 130, + DX_LEGACY_F16TOF32 = 131, + DX_WAVE_ALL_BIT_COUNT = 135, +@@ -576,6 +578,13 @@ enum dxil_wave_op_kind + WAVE_OP_MAX = 3, + }; + ++enum dxil_quad_op_kind ++{ ++ QUAD_READ_ACROSS_X = 0, ++ QUAD_READ_ACROSS_Y = 1, ++ QUAD_READ_ACROSS_D = 2, ++}; ++ + struct sm6_pointer_info + { + const struct sm6_type *type; +@@ -932,7 +941,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length + if (!length) + return 0; + +- assert(length < 32); ++ VKD3D_ASSERT(length < 32); + + if (sm6_parser_is_end(sm6)) + { +@@ -940,7 +949,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length + return 0; + } + +- assert(sm6->bitpos < 32); ++ VKD3D_ASSERT(sm6->bitpos < 32); + bits = *sm6->ptr >> sm6->bitpos; + l = 32 - sm6->bitpos; + if (l <= length) +@@ -1199,7 +1208,7 @@ static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) + struct dxil_global_abbrev *global_abbrev; + enum vkd3d_result ret; + +- assert(block->id == BLOCKINFO_BLOCK); ++ VKD3D_ASSERT(block->id == BLOCKINFO_BLOCK); + + if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) + || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) +@@ -1468,7 +1477,7 @@ static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct + if (sm6->abbrevs[i]->block_id == block->id) + block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; + +- assert(abbrev_count == block->abbrev_count); ++ VKD3D_ASSERT(abbrev_count == block->abbrev_count); + } + + if ((ret = dxil_block_read(block, sm6)) < 0) +@@ -1546,7 +1555,7 @@ static char *dxil_record_to_string(const struct dxil_record *record, unsigned in + unsigned int i; + char *str; + +- assert(offset <= record->operand_count); ++ VKD3D_ASSERT(offset <= record->operand_count); + if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) + { + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, +@@ -1834,7 +1843,7 @@ static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) + ++sm6->type_count; + } + +- assert(sm6->type_count == type_count); ++ VKD3D_ASSERT(sm6->type_count == type_count); + + if (struct_name) + { +@@ -2207,13 +2216,13 @@ static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) + + static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) + { +- assert(sm6_value_is_function_dcl(fn)); ++ VKD3D_ASSERT(sm6_value_is_function_dcl(fn)); + return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); + } + + static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) + { +- assert(sm6->value_count < sm6->value_capacity); ++ VKD3D_ASSERT(sm6->value_count < sm6->value_capacity); + return &sm6->values[sm6->value_count]; + } + +@@ -3395,7 +3404,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa + enum vkd3d_shader_opcode handler_idx) + { + struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); +- assert(ins); ++ VKD3D_ASSERT(ins); + vsir_instruction_init(ins, &sm6->p.location, handler_idx); + ++sm6->p.program->instructions.count; + return ins; +@@ -3642,7 +3651,7 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init + { + const struct sm6_value *value; + +- assert(index); ++ VKD3D_ASSERT(index); + --index; + if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_icb(value) && !sm6_value_is_undef(value))) + { +@@ -3755,21 +3764,21 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) + for (i = 0; i < sm6->p.program->instructions.count; ++i) + { + ins = &sm6->p.program->instructions.elements[i]; +- if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) ++ if (ins->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) + { + ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( + (uintptr_t)ins->declaration.indexable_temp.initialiser, sm6); + } +- else if (ins->handler_idx == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) ++ else if (ins->opcode == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) + { + ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); + } +- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) ++ else if (ins->opcode == VKD3DSIH_DCL_TGSM_RAW) + { + ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } +- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) ++ else if (ins->opcode == VKD3DSIH_DCL_TGSM_STRUCTURED) + { + ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; +@@ -3886,7 +3895,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade + if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) + param->reg.idx[count++].offset = 0; + +- assert(count < ARRAY_SIZE(param->reg.idx)); ++ VKD3D_ASSERT(count < ARRAY_SIZE(param->reg.idx)); + param->reg.idx[count++].offset = i; + param->reg.idx_count = count; + } +@@ -4402,7 +4411,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record + code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); + } + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, +@@ -4619,6 +4628,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co + return VKD3DSIH_IMAX; + case DX_IMIN: + return VKD3DSIH_IMIN; ++ case DX_QUAD_READ_LANE_AT: ++ return VKD3DSIH_QUAD_READ_LANE_AT; + case DX_UMAX: + return VKD3DSIH_UMAX; + case DX_UMIN: +@@ -4855,10 +4866,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr + return; + src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); + register_index_address_init(&src_param->reg.idx[2], operands[1], sm6); +- assert(src_param->reg.idx_count == 3); ++ VKD3D_ASSERT(src_param->reg.idx_count == 3); + + type = sm6_type_get_scalar_type(dst->type, 0); +- assert(type); ++ VKD3D_ASSERT(type); + src_param->reg.data_type = vkd3d_data_type_from_sm6_type(type); + if (data_type_is_64_bit(src_param->reg.data_type)) + src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); +@@ -4962,7 +4973,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int + reg->non_uniform = !!sm6_value_get_constant_uint(operands[3]); + + /* NOP is used to flag no instruction emitted. */ +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, +@@ -5331,7 +5342,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin + + if (!is_patch_constant && !operands[3]->is_undefined) + { +- assert(src_param->reg.idx_count > count); ++ VKD3D_ASSERT(src_param->reg.idx_count > count); + register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); + } + +@@ -5370,6 +5381,47 @@ static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intr + sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); + } + ++static enum vkd3d_shader_opcode dx_map_quad_op(enum dxil_quad_op_kind op) ++{ ++ switch (op) ++ { ++ case QUAD_READ_ACROSS_X: ++ return VKD3DSIH_QUAD_READ_ACROSS_X; ++ case QUAD_READ_ACROSS_Y: ++ return VKD3DSIH_QUAD_READ_ACROSS_Y; ++ case QUAD_READ_ACROSS_D: ++ return VKD3DSIH_QUAD_READ_ACROSS_D; ++ default: ++ return VKD3DSIH_INVALID; ++ } ++} ++ ++static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, ++ const struct sm6_value **operands, struct function_emission_state *state) ++{ ++ struct vkd3d_shader_instruction *ins = state->ins; ++ struct vkd3d_shader_src_param *src_param; ++ enum vkd3d_shader_opcode opcode; ++ enum dxil_quad_op_kind quad_op; ++ ++ quad_op = sm6_value_get_constant_uint(operands[1]); ++ if ((opcode = dx_map_quad_op(quad_op)) == VKD3DSIH_INVALID) ++ { ++ FIXME("Unhandled quad op kind %u.\n", quad_op); ++ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, ++ "Quad op kind %u is unhandled.", quad_op); ++ return; ++ } ++ ++ vsir_instruction_init(ins, &sm6->p.location, opcode); ++ ++ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) ++ return; ++ src_param_init_from_value(src_param, operands[0]); ++ ++ instruction_dst_param_init_ssa_scalar(ins, sm6); ++} ++ + static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) + { +@@ -6229,6 +6281,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = + [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, + [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, + [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, ++ [DX_QUAD_OP ] = {"n", "Rc", sm6_parser_emit_dx_quad_op}, ++ [DX_QUAD_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, + [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, + [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, + [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, +@@ -6346,7 +6400,7 @@ static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_ + + info = &sm6_dx_op_table[op]; + +- assert(info->ret_type[0]); ++ VKD3D_ASSERT(info->ret_type[0]); + if (!sm6_parser_validate_operand_type(sm6, dst, info->ret_type[0], NULL, true)) + { + WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); +@@ -6381,7 +6435,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade + { + const struct sm6_type *type; + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + + if (!dst->type) + return; +@@ -6551,7 +6605,7 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ + else if (to->u.width > from->u.width) + { + op = (code == CAST_ZEXT) ? VKD3DSIH_UTOU : VKD3DSIH_ITOI; +- assert(from->u.width == 1 || to->u.width == 64); ++ VKD3D_ASSERT(from->u.width == 1 || to->u.width == 64); + is_valid = from_int && to_int; + } + break; +@@ -6628,7 +6682,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor + { + *dst = *value; + dst->type = type; +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + return; + } + +@@ -6739,7 +6793,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor + * do not otherwise occur, so deleting these avoids the need for backend support. */ + if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) + { +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + *dst = *a; + return; + } +@@ -7039,7 +7093,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record + reg->idx_count = 2; + dst->structure_stride = src->structure_stride; + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -7087,7 +7141,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor + + if (ptr->structure_stride) + { +- assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); + + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) +@@ -7189,7 +7243,7 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record + incoming[j].block = sm6_function_get_block(function, record->operands[i + 1], sm6); + } + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + + qsort(incoming, phi->incoming_count, sizeof(*incoming), phi_incoming_compare); + +@@ -7224,7 +7278,7 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record + + code_block->terminator.type = TERMINATOR_RET; + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -7270,7 +7324,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco + + if (ptr->structure_stride) + { +- assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); ++ VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); + + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) +@@ -7326,7 +7380,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec + if (!(src = sm6_parser_get_value_by_ref(sm6, record, type, &i)) + || !sm6_value_validate_is_register(src, sm6)) + return; +- assert(i == 2); ++ VKD3D_ASSERT(i == 2); + + if (src->type != type) + { +@@ -7384,7 +7438,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec + terminator->cases[i / 2u].value = sm6_value_get_constant_uint64(src); + } + +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + } + + static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_record *record, +@@ -7636,7 +7690,7 @@ static void metadata_attachment_record_apply(const struct dxil_record *record, e + "Ignoring a nested metadata attachment."); + } + +- assert(record->operand_count & 1); ++ VKD3D_ASSERT(record->operand_count & 1); + for (i = 1; i < record->operand_count; i += 2) + { + if (!(m = sm6_parser_find_metadata_kind(sm6, record->operands[i]))) +@@ -7843,7 +7897,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + } + + ins = &code_block->instructions[code_block->instruction_count]; +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + + dst = sm6_parser_get_current_value(sm6); + fwd_type = dst->type; +@@ -7922,7 +7976,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + + if (sm6->p.failed) + return VKD3D_ERROR; +- assert(ins->handler_idx != VKD3DSIH_INVALID); + + if (record->attachment) + metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); +@@ -7933,9 +7986,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const + code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; + } + if (code_block) +- code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; +- else +- assert(ins->handler_idx == VKD3DSIH_NOP); ++ code_block->instruction_count += ins->opcode != VKD3DSIH_NOP; + + if (dst->type && fwd_type && dst->type != fwd_type) + { +@@ -8002,7 +8053,7 @@ static void sm6_block_emit_terminator(const struct sm6_block *block, struct sm6_ + switch_case = &block->terminator.cases[i]; + if (!(case_block = switch_case->block)) + { +- assert(sm6->p.failed); ++ VKD3D_ASSERT(sm6->p.failed); + continue; + } + if (switch_case->is_default) +@@ -8071,7 +8122,7 @@ static void sm6_block_emit_phi(const struct sm6_block *block, struct sm6_parser + if (incoming_block) + vsir_src_param_init_label(&src_params[index + 1], incoming_block->id); + else +- assert(sm6->p.failed); ++ VKD3D_ASSERT(sm6->p.failed); + } + + dst_param_init(dst_param); +@@ -8735,7 +8786,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc + + if (!m) + { +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->declaration.raw_resource.resource.reg.write_mask = 0; + return &ins->declaration.raw_resource.resource; + } +@@ -8760,7 +8811,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc + "A typed resource has no data type."); + } + +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + ins->declaration.semantic.resource_data_type[i] = resource_values.data_type; + ins->declaration.semantic.resource_type = resource_type; +@@ -8770,14 +8821,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc + } + else if (kind == RESOURCE_KIND_RAWBUFFER) + { +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->declaration.raw_resource.resource.reg.write_mask = 0; + + return &ins->declaration.raw_resource.resource; + } + else if (kind == RESOURCE_KIND_STRUCTUREDBUFFER) + { +- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; ++ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; + ins->declaration.structured_resource.byte_stride = resource_values.byte_stride; + ins->declaration.structured_resource.resource.reg.write_mask = 0; + +@@ -8858,7 +8909,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, + d->kind = kind; + d->reg_type = VKD3DSPR_RESOURCE; + d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; +- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL) ++ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) + ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; + + init_resource_declaration(resource, VKD3DSPR_RESOURCE, d->reg_data_type, d->id, &d->range); +@@ -8932,7 +8983,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, + d->kind = values[0]; + d->reg_type = VKD3DSPR_UAV; + d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; +- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL_UAV_TYPED) ++ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) + ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; + + init_resource_declaration(resource, VKD3DSPR_UAV, d->reg_data_type, d->id, &d->range); +@@ -10155,12 +10206,13 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 + return NULL; + } + +-static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, ++static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, ++ const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) + { + size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; ++ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + struct shader_signature *patch_constant_signature, *output_signature, *input_signature; +- const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; + const uint32_t *byte_code = dxbc_desc->byte_code; + unsigned int chunk_offset, chunk_size; +@@ -10251,9 +10303,9 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro + + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; +- if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) ++ if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); ++ vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); + sm6->ptr = &sm6->start[1]; + sm6->bitpos = 2; + +@@ -10489,7 +10541,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co + uint32_t *byte_code = NULL; + int ret; + +- ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); ++ MESSAGE("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + dxbc_desc.is_dxil = true; + if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, +@@ -10514,7 +10566,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co + dxbc_desc.byte_code = byte_code; + } + +- ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); ++ ret = sm6_parser_init(&sm6, program, compile_info, message_context, &dxbc_desc); + free_dxbc_shader_desc(&dxbc_desc); + vkd3d_free(byte_code); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 57b4ac24212..e3ebbafb3f4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -56,6 +56,114 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) + vkd3d_free(string_entry); + } + ++struct function_component ++{ ++ const char *name; ++ bool lhs_has_index; ++ unsigned int lhs_index; ++}; ++ ++static const struct state_block_function_info ++{ ++ const char *name; ++ unsigned int min_args, max_args; ++ const struct function_component components[3]; ++ unsigned int min_profile; ++} ++function_info[] = ++{ ++ {"SetBlendState", 3, 3, { { "AB_BlendFactor" }, { "AB_SampleMask" }, { "BlendState" } }, 4 }, ++ {"SetDepthStencilState", 2, 2, { { "DS_StencilRef" }, { "DepthStencilState" } }, 4 }, ++ {"SetRasterizerState", 1, 1, { { "RasterizerState" } }, 4 }, ++ {"SetVertexShader", 1, 1, { { "VertexShader" } }, 4 }, ++ {"SetDomainShader", 1, 1, { { "DomainShader" } }, 5 }, ++ {"SetHullShader", 1, 1, { { "HullShader" } }, 5 }, ++ {"SetGeometryShader", 1, 1, { { "GeometryShader" } }, 4 }, ++ {"SetPixelShader", 1, 1, { { "PixelShader" } }, 4 }, ++ {"SetComputeShader", 1, 1, { { "ComputeShader" } }, 4 }, ++ {"OMSetRenderTargets", 2, 9, { {0} }, 4 }, ++}; ++ ++static const struct state_block_function_info *get_state_block_function_info(const char *name) ++{ ++ for (unsigned int i = 0; i < ARRAY_SIZE(function_info); ++i) ++ { ++ if (!strcmp(name, function_info[i].name)) ++ return &function_info[i]; ++ } ++ return NULL; ++} ++ ++static void add_function_component(struct function_component **components, const char *name, ++ bool lhs_has_index, unsigned int lhs_index) ++{ ++ struct function_component *comp = *components; ++ ++ comp->name = name; ++ comp->lhs_has_index = lhs_has_index; ++ comp->lhs_index = lhs_index; ++ ++ *components = *components + 1; ++} ++ ++static void get_state_block_function_components(const struct state_block_function_info *info, ++ struct function_component *components, unsigned int comp_count) ++{ ++ unsigned int i; ++ ++ assert(comp_count <= info->max_args); ++ ++ if (info->min_args == info->max_args) ++ { ++ const struct function_component *c = info->components; ++ for (i = 0; i < comp_count; ++i, ++c) ++ add_function_component(&components, c->name, c->lhs_has_index, c->lhs_index); ++ return; ++ } ++ ++ if (!strcmp(info->name, "OMSetRenderTargets")) ++ { ++ for (i = 0; i < comp_count - 2; ++i) ++ add_function_component(&components, "RenderTargetView", true, i + 1); ++ add_function_component(&components, "DepthStencilView", false, 0); ++ add_function_component(&components, "RenderTargetView", true, 0); ++ } ++} ++ ++bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, ++ const struct vkd3d_shader_location *loc) ++{ ++ if (entry->is_function_call) ++ { ++ const struct state_block_function_info *info = get_state_block_function_info(entry->name); ++ ++ if (!info) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid state block function '%s'.", entry->name); ++ return false; ++ } ++ if (entry->args_count < info->min_args || entry->args_count > info->max_args) ++ { ++ if (info->min_args == info->max_args) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid argument count for state block function '%s' (expected %u).", ++ entry->name, info->min_args); ++ } ++ else ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "Invalid argument count for state block function '%s' (expected from %u to %u).", ++ entry->name, info->min_args, info->max_args); ++ } ++ return false; ++ } ++ } ++ ++ return true; ++} ++ + struct fx_write_context; + + struct fx_write_context_ops +@@ -63,6 +171,7 @@ struct fx_write_context_ops + uint32_t (*write_string)(const char *string, struct fx_write_context *fx); + void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); ++ void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); + bool are_child_effects_supported; + }; + +@@ -94,6 +203,8 @@ struct fx_write_context + uint32_t texture_count; + uint32_t uav_count; + uint32_t sampler_state_count; ++ uint32_t depth_stencil_state_count; ++ uint32_t rasterizer_state_count; + int status; + + bool child_effect; +@@ -122,14 +233,46 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) + + static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) + { +- if (var->state_block_count) +- hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); +- + fx->ops->write_pass(var, fx); + } + ++static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) ++{ ++ struct hlsl_ctx *ctx = fx->ctx; ++ struct hlsl_ir_var *v; ++ uint32_t count = 0; ++ ++ if (!scope) ++ return 0; ++ ++ LIST_FOR_EACH_ENTRY(v, &scope->vars, struct hlsl_ir_var, scope_entry) ++ { ++ if (!v->default_values) ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Annotation variable is missing default value."); ++ ++ fx->ops->write_annotation(v, fx); ++ ++count; ++ } ++ ++ return count; ++} ++ ++static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t count_offset, count; ++ ++ count_offset = put_u32(buffer, 0); ++ count = write_annotations(scope, fx); ++ set_u32(buffer, count_offset, count); ++} ++ + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); + static const char * get_fx_4_type_name(const struct hlsl_type *type); ++static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); ++static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, ++ uint32_t count_offset, struct fx_write_context *fx); + + static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) + { +@@ -138,7 +281,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context + unsigned int elements_count; + const char *name; + +- assert(fx->ctx->profile->major_version >= 4); ++ VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); + + if (type->class == HLSL_CLASS_ARRAY) + { +@@ -274,15 +417,14 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f + static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) + { + struct vkd3d_bytecode_buffer *buffer = &fx->structured; +- uint32_t name_offset; ++ uint32_t name_offset, count_offset; + + name_offset = write_string(var->name, fx); + put_u32(buffer, name_offset); +- put_u32(buffer, 0); /* Assignment count. */ +- put_u32(buffer, 0); /* Annotation count. */ ++ count_offset = put_u32(buffer, 0); + +- /* TODO: annotations */ +- /* TODO: assignments */ ++ write_fx_4_annotations(var->annotations, fx); ++ write_fx_4_state_block(var, 0, count_offset, fx); + } + + static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) +@@ -297,6 +439,12 @@ static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx + + /* TODO: annotations */ + /* TODO: assignments */ ++ ++ if (var->state_block_count && var->state_blocks[0]->count) ++ hlsl_fixme(fx->ctx, &var->loc, "Write pass assignments."); ++ ++ /* For some reason every pass adds to the total shader object count. */ ++ fx->shader_count++; + } + + static uint32_t get_fx_4_type_size(const struct hlsl_type *type) +@@ -402,6 +550,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + case HLSL_CLASS_UAV: + return uav_type_names[type->sampler_dim]; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ return "DepthStencilState"; ++ + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + return "DepthStencilView"; + +@@ -421,10 +572,20 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) + { ++ struct field_offsets ++ { ++ uint32_t name; ++ uint32_t semantic; ++ uint32_t offset; ++ uint32_t type; ++ }; ++ uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; +- uint32_t name_offset, offset, size, stride, numeric_desc; ++ struct field_offsets *field_offsets = NULL; ++ struct hlsl_ctx *ctx = fx->ctx; + uint32_t elements_count = 0; + const char *name; ++ size_t i; + + /* Resolve arrays to element type and number of elements. */ + if (type->class == HLSL_CLASS_ARRAY) +@@ -436,6 +597,22 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + name = get_fx_4_type_name(type); + + name_offset = write_string(name, fx); ++ if (type->class == HLSL_CLASS_STRUCT) ++ { ++ if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) ++ return 0; ++ ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ const struct hlsl_struct_field *field = &type->e.record.fields[i]; ++ ++ field_offsets[i].name = write_string(field->name, fx); ++ field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); ++ field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; ++ field_offsets[i].type = write_type(field->type, fx); ++ } ++ } ++ + offset = put_u32_unaligned(buffer, name_offset); + + switch (type->class) +@@ -446,13 +623,19 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + put_u32_unaligned(buffer, 1); + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + put_u32_unaligned(buffer, 2); + break; + +@@ -464,6 +647,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_CONSTANT_BUFFER: + vkd3d_unreachable(); + + case HLSL_CLASS_STRING: +@@ -473,34 +657,40 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + return 0; + } + +- size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); ++ /* Structures can only contain numeric fields, this is validated during variable declaration. */ ++ total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); ++ packed_size = 0; ++ if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) ++ packed_size = hlsl_type_component_count(type) * sizeof(float); + if (elements_count) +- size *= elements_count; ++ { ++ total_size *= elements_count; ++ packed_size *= elements_count; ++ } + stride = align(stride, 4 * sizeof(float)); + + put_u32_unaligned(buffer, elements_count); +- put_u32_unaligned(buffer, size); /* Total size. */ +- put_u32_unaligned(buffer, stride); /* Stride. */ +- put_u32_unaligned(buffer, size); ++ put_u32_unaligned(buffer, total_size); ++ put_u32_unaligned(buffer, stride); ++ put_u32_unaligned(buffer, packed_size); + + if (type->class == HLSL_CLASS_STRUCT) + { +- size_t i; +- + put_u32_unaligned(buffer, type->e.record.field_count); + for (i = 0; i < type->e.record.field_count; ++i) + { +- const struct hlsl_struct_field *field = &type->e.record.fields[i]; +- uint32_t semantic_offset, field_type_offset; ++ const struct field_offsets *field = &field_offsets[i]; + +- name_offset = write_string(field->name, fx); +- semantic_offset = write_string(field->semantic.name, fx); +- field_type_offset = write_type(field->type, fx); ++ put_u32_unaligned(buffer, field->name); ++ put_u32_unaligned(buffer, field->semantic); ++ put_u32_unaligned(buffer, field->offset); ++ put_u32_unaligned(buffer, field->type); ++ } + +- put_u32_unaligned(buffer, name_offset); +- put_u32_unaligned(buffer, semantic_offset); +- put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); +- put_u32_unaligned(buffer, field_type_offset); ++ if (ctx->profile->major_version == 5) ++ { ++ put_u32_unaligned(buffer, 0); /* Base class type */ ++ put_u32_unaligned(buffer, 0); /* Interface count */ + } + } + else if (type->class == HLSL_CLASS_TEXTURE) +@@ -556,18 +746,38 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + { + put_u32_unaligned(buffer, 6); + } ++ else if (type->class == HLSL_CLASS_RASTERIZER_STATE) ++ { ++ put_u32_unaligned(buffer, 4); ++ } ++ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) ++ { ++ put_u32_unaligned(buffer, 3); ++ } + else if (hlsl_is_numeric_type(type)) + { + numeric_desc = get_fx_4_numeric_type_description(type, fx); + put_u32_unaligned(buffer, numeric_desc); + } ++ else if (type->class == HLSL_CLASS_COMPUTE_SHADER) ++ { ++ put_u32_unaligned(buffer, 28); ++ } ++ else if (type->class == HLSL_CLASS_HULL_SHADER) ++ { ++ put_u32_unaligned(buffer, 29); ++ } ++ else if (type->class == HLSL_CLASS_DOMAIN_SHADER) ++ { ++ put_u32_unaligned(buffer, 30); ++ } + else + { + FIXME("Type %u is not supported.\n", type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); +- return 0; + } + ++ vkd3d_free(field_offsets); + return offset; + } + +@@ -581,8 +791,9 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex + name_offset = write_string(var->name, fx); + put_u32(buffer, name_offset); + count_offset = put_u32(buffer, 0); +- put_u32(buffer, 0); /* Annotation count. */ ++ write_fx_4_annotations(var->annotations, fx); + ++ count = 0; + LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) + { + write_pass(pass, fx); +@@ -617,7 +828,7 @@ static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) + + put_u32(buffer, name_offset); + count_offset = put_u32(buffer, 0); /* Technique count */ +- put_u32(buffer, 0); /* Annotation count */ ++ write_fx_4_annotations(var ? var->annotations : NULL, fx); + + count = fx->technique_count; + write_techniques(var ? var->scope : fx->ctx->globals, fx); +@@ -683,7 +894,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + } + + name_offset = write_string(name, fx); +- semantic_offset = write_string(semantic->name, fx); ++ semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; + + offset = put_u32(buffer, hlsl_sm1_base_type(type)); + put_u32(buffer, hlsl_sm1_class(type)); +@@ -705,6 +916,10 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + case HLSL_CLASS_STRUCT: + put_u32(buffer, type->e.record.field_count); + break; ++ case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_PIXEL_SHADER: ++ fx->shader_count += elements_count; ++ break; + default: + ; + } +@@ -716,7 +931,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + + /* Validated in check_invalid_object_fields(). */ +- assert(hlsl_is_numeric_type(field->type)); ++ VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); + write_fx_2_parameter(field->type, field->name, &field->semantic, fx); + } + } +@@ -794,6 +1009,9 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + /* FIXME: write actual initial value */ ++ if (var->default_values) ++ hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); ++ + offset = put_u32(buffer, 0); + + for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) +@@ -850,15 +1068,22 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type + hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); + return false; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_UAV: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + return false; + + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: ++ case HLSL_CLASS_CONSTANT_BUFFER: + /* This cannot appear as an extern variable. */ + break; + } +@@ -910,7 +1135,7 @@ static const struct fx_write_context_ops fx_2_ops = + + static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + { +- uint32_t offset, size, technique_count, parameter_count, object_count; ++ uint32_t offset, size, technique_count, shader_count, parameter_count, object_count; + struct vkd3d_bytecode_buffer buffer = { 0 }; + struct vkd3d_bytecode_buffer *structured; + struct fx_write_context fx; +@@ -927,7 +1152,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + + parameter_count = put_u32(structured, 0); /* Parameter count */ + technique_count = put_u32(structured, 0); +- put_u32(structured, 0); /* Unknown */ ++ shader_count = put_u32(structured, 0); + object_count = put_u32(structured, 0); + + write_fx_2_parameters(&fx); +@@ -936,6 +1161,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + + write_techniques(ctx->globals, &fx); + set_u32(structured, technique_count, fx.technique_count); ++ set_u32(structured, shader_count, fx.shader_count); + + put_u32(structured, 0); /* String count */ + put_u32(structured, 0); /* Resource count */ +@@ -972,9 +1198,72 @@ static const struct fx_write_context_ops fx_4_ops = + .write_string = write_fx_4_string, + .write_technique = write_fx_4_technique, + .write_pass = write_fx_4_pass, ++ .write_annotation = write_fx_4_annotation, + .are_child_effects_supported = true, + }; + ++static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, ++ struct fx_write_context *fx) ++{ ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); ++ uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; ++ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; ++ struct hlsl_ctx *ctx = fx->ctx; ++ uint32_t offset = buffer->size; ++ unsigned int comp_count; ++ ++ if (!value) ++ return 0; ++ ++ comp_count = hlsl_type_component_count(type); ++ ++ for (i = 0; i < elements_count; ++i) ++ { ++ switch (type->class) ++ { ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_VECTOR: ++ case HLSL_CLASS_MATRIX: ++ { ++ switch (type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ case HLSL_TYPE_INT: ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ ++ for (j = 0; j < comp_count; ++j) ++ { ++ put_u32_unaligned(buffer, value->value.u); ++ value++; ++ } ++ break; ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", ++ type->e.numeric.type); ++ } ++ ++ break; ++ } ++ case HLSL_CLASS_STRUCT: ++ { ++ struct hlsl_struct_field *fields = type->e.record.fields; ++ ++ for (j = 0; j < type->e.record.field_count; ++j) ++ { ++ write_fx_4_default_value(fields[i].type, value, fx); ++ value += hlsl_type_component_count(fields[i].type); ++ } ++ break; ++ } ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); ++ } ++ } ++ ++ return offset; ++} ++ + static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) + { + struct vkd3d_bytecode_buffer *buffer = &fx->structured; +@@ -984,22 +1273,20 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st + { + HAS_EXPLICIT_BIND_POINT = 0x4, + }; +- struct hlsl_ctx *ctx = fx->ctx; + +- /* Explicit bind point. */ +- if (var->reg_reservation.reg_type) ++ if (var->has_explicit_bind_point) + flags |= HAS_EXPLICIT_BIND_POINT; + + type_offset = write_type(var->data_type, fx); + name_offset = write_string(var->name, fx); +- semantic_offset = write_string(var->semantic.name, fx); ++ semantic_offset = write_string(var->semantic.raw_name, fx); + + put_u32(buffer, name_offset); + put_u32(buffer, type_offset); + + semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ +- put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ +- value_offset = put_u32(buffer, 0); /* Default value offset */ ++ put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ ++ value_offset = put_u32(buffer, 0); + put_u32(buffer, flags); /* Flags */ + + if (shared) +@@ -1008,17 +1295,39 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st + } + else + { +- /* FIXME: write default value */ +- set_u32(buffer, value_offset, 0); ++ uint32_t offset = write_fx_4_default_value(var->data_type, var->default_values, fx); ++ set_u32(buffer, value_offset, offset); + +- put_u32(buffer, 0); /* Annotations count */ +- if (has_annotations(var)) +- hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); ++ write_fx_4_annotations(var->annotations, fx); + + fx->numeric_variable_count++; + } + } + ++static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t name_offset, type_offset, offset; ++ struct hlsl_ctx *ctx = fx->ctx; ++ ++ name_offset = write_string(var->name, fx); ++ type_offset = write_type(var->data_type, fx); ++ ++ put_u32(buffer, name_offset); ++ put_u32(buffer, type_offset); ++ ++ if (hlsl_is_numeric_type(type)) ++ { ++ offset = write_fx_4_default_value(var->data_type, var->default_values, fx); ++ put_u32(buffer, offset); ++ } ++ else ++ { ++ hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); ++ } ++} ++ + struct rhs_named_value + { + const char *name; +@@ -1086,11 +1395,8 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_node *value = entry->args->node; + +- if (entry->lhs_has_index) +- hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); +- + put_u32(buffer, entry->name_id); +- put_u32(buffer, 0); /* TODO: destination index */ ++ put_u32(buffer, entry->lhs_index); + type_offset = put_u32(buffer, 0); + rhs_offset = put_u32(buffer, 0); + +@@ -1104,6 +1410,17 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl + assignment_type = 1; + break; + } ++ case HLSL_IR_LOAD: ++ { ++ struct hlsl_ir_load *l = hlsl_ir_load(value); ++ ++ if (l->src.path_len) ++ hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); ++ ++ value_offset = write_fx_4_string(l->src.var->name, fx); ++ assignment_type = 2; ++ break; ++ } + default: + hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); + } +@@ -1118,6 +1435,9 @@ static bool state_block_contains_state(const char *name, unsigned int start, str + + for (i = start; i < block->count; ++i) + { ++ if (block->entries[i]->is_function_call) ++ continue; ++ + if (!ascii_strcasecmp(block->entries[i]->name, name)) + return true; + } +@@ -1160,6 +1480,92 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no + return true; + } + ++static void fold_state_value(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry) ++{ ++ bool progress; ++ ++ do ++ { ++ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); ++ progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); ++ } while (progress); ++} ++ ++enum state_property_component_type ++{ ++ FX_BOOL, ++ FX_FLOAT, ++ FX_UINT, ++ FX_UINT8, ++ FX_DEPTHSTENCIL, ++ FX_RASTERIZER, ++ FX_DOMAINSHADER, ++ FX_HULLSHADER, ++ FX_COMPUTESHADER, ++ FX_TEXTURE, ++ FX_DEPTHSTENCILVIEW, ++ FX_RENDERTARGETVIEW, ++}; ++ ++static inline bool is_object_fx_type(enum state_property_component_type type) ++{ ++ switch (type) ++ { ++ case FX_DEPTHSTENCIL: ++ case FX_RASTERIZER: ++ case FX_DOMAINSHADER: ++ case FX_HULLSHADER: ++ case FX_COMPUTESHADER: ++ case FX_TEXTURE: ++ case FX_RENDERTARGETVIEW: ++ case FX_DEPTHSTENCILVIEW: ++ return true; ++ default: ++ return false; ++ } ++} ++ ++static inline enum hlsl_type_class hlsl_type_class_from_fx_type(enum state_property_component_type type) ++{ ++ switch (type) ++ { ++ case FX_DEPTHSTENCIL: ++ return HLSL_CLASS_DEPTH_STENCIL_STATE; ++ case FX_RASTERIZER: ++ return HLSL_CLASS_RASTERIZER_STATE; ++ case FX_DOMAINSHADER: ++ return HLSL_CLASS_DOMAIN_SHADER; ++ case FX_HULLSHADER: ++ return HLSL_CLASS_HULL_SHADER; ++ case FX_COMPUTESHADER: ++ return HLSL_CLASS_COMPUTE_SHADER; ++ case FX_TEXTURE: ++ return HLSL_CLASS_TEXTURE; ++ case FX_RENDERTARGETVIEW: ++ return HLSL_CLASS_RENDER_TARGET_VIEW; ++ case FX_DEPTHSTENCILVIEW: ++ return HLSL_CLASS_DEPTH_STENCIL_VIEW; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) ++{ ++ switch (type) ++ { ++ case FX_BOOL: ++ return HLSL_TYPE_BOOL; ++ case FX_FLOAT: ++ return HLSL_TYPE_FLOAT; ++ case FX_UINT: ++ case FX_UINT8: ++ return HLSL_TYPE_UINT; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ + static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, + struct fx_write_context *fx) + { +@@ -1209,37 +1615,126 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + { NULL } + }; + ++ static const struct rhs_named_value depth_write_mask_values[] = ++ { ++ { "ZERO", 0 }, ++ { "ALL", 1 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value comparison_values[] = ++ { ++ { "NEVER", 1 }, ++ { "LESS", 2 }, ++ { "EQUAL", 3 }, ++ { "LESS_EQUAL", 4 }, ++ { "GREATER", 5 }, ++ { "NOT_EQUAL", 6 }, ++ { "GREATER_EQUAL", 7 }, ++ { "ALWAYS", 8 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value stencil_op_values[] = ++ { ++ { "KEEP", 1 }, ++ { "ZERO", 2 }, ++ { "REPLACE", 3 }, ++ { "INCR_SAT", 4 }, ++ { "DECR_SAT", 5 }, ++ { "INVERT", 6 }, ++ { "INCR", 7 }, ++ { "DECR", 8 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value fill_values[] = ++ { ++ { "WIREFRAME", 2 }, ++ { "SOLID", 3 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value cull_values[] = ++ { ++ { "NONE", 1 }, ++ { "FRONT", 2 }, ++ { "BACK", 3 }, ++ { NULL } ++ }; ++ + static const struct state + { + const char *name; + enum hlsl_type_class container; +- enum hlsl_base_type type; ++ enum hlsl_type_class class; ++ enum state_property_component_type type; + unsigned int dimx; ++ unsigned int array_size; + uint32_t id; + const struct rhs_named_value *values; + } + states[] = + { +- { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, +- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, +- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, +- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, +- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, +- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, +- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, +- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, +- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, +- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, +- /* TODO: "Texture" field */ ++ { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, ++ { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, ++ ++ { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, ++ { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, ++ ++ { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, ++ ++ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, ++ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, ++ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14 }, ++ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, ++ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, ++ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, ++ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18 }, ++ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19 }, ++ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20 }, ++ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21 }, ++ ++ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22 }, ++ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, ++ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, ++ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25 }, ++ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, ++ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, ++ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, ++ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, ++ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, ++ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, ++ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, ++ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, ++ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, ++ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, ++ ++ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, ++ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, ++ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, ++ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, ++ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, ++ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, ++ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, ++ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, ++ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, ++ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, ++ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55 }, ++ ++ { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, ++ { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, ++ { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, + }; + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct replace_state_context replace_context; ++ struct hlsl_type *state_type = NULL; + struct hlsl_ir_node *node, *cast; + const struct state *state = NULL; + struct hlsl_ctx *ctx = fx->ctx; +- struct hlsl_type *state_type; ++ enum hlsl_base_type base_type; ++ struct hlsl_ir_load *load; + unsigned int i; +- bool progress; + + for (i = 0; i < ARRAY_SIZE(states); ++i) + { +@@ -1264,69 +1759,240 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + return; + } + ++ if (entry->lhs_has_index && state->array_size == 1) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Can't use array-style access for non-array state %s.", ++ entry->name); ++ return; ++ } ++ ++ if (!entry->lhs_has_index && state->array_size > 1) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected array index for array state %s.", ++ entry->name); ++ return; ++ } ++ ++ if (entry->lhs_has_index && (state->array_size <= entry->lhs_index)) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid element index %u for the state %s[%u].", ++ entry->lhs_index, state->name, state->array_size); ++ return; ++ } ++ + entry->name_id = state->id; + + replace_context.values = state->values; + replace_context.var = var; + +- /* Turned named constants to actual constants. */ ++ /* Turn named constants to actual constants. */ + hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); ++ fold_state_value(ctx, entry); + +- if (state->dimx) +- state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); +- else +- state_type = hlsl_get_scalar_type(ctx, state->type); ++ /* Now cast and run folding again. */ ++ ++ if (is_object_fx_type(state->type)) ++ { ++ node = entry->args->node; ++ ++ switch (node->type) ++ { ++ case HLSL_IR_LOAD: ++ load = hlsl_ir_load(node); ++ ++ if (load->src.path_len) ++ hlsl_fixme(ctx, &ctx->location, "Arrays are not supported for RHS."); ++ ++ if (load->src.var->data_type->class != hlsl_type_class_from_fx_type(state->type)) ++ { ++ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Type mismatch for the %s state value", ++ entry->name); ++ } ++ ++ break; ++ default: ++ hlsl_fixme(ctx, &ctx->location, "Unhandled node type for object-typed field."); ++ } + +- /* Cast to expected property type. */ +- node = entry->args->node; +- if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) + return; +- list_add_after(&node->entry, &cast->entry); ++ } + +- hlsl_src_remove(entry->args); +- hlsl_src_from_node(entry->args, cast); ++ base_type = hlsl_type_from_fx_type(state->type); ++ switch (state->class) ++ { ++ case HLSL_CLASS_VECTOR: ++ state_type = hlsl_get_vector_type(ctx, base_type, state->dimx); ++ break; ++ case HLSL_CLASS_SCALAR: ++ state_type = hlsl_get_scalar_type(ctx, base_type); ++ break; ++ case HLSL_CLASS_TEXTURE: ++ hlsl_fixme(ctx, &ctx->location, "Object type fields are not supported."); ++ break; ++ default: ++ ; ++ } + +- do ++ if (state_type) + { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); +- } while (progress); ++ node = entry->args->node; ++ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) ++ return; ++ list_add_after(&node->entry, &cast->entry); ++ ++ /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ ++ if (state->type == FX_UINT8) ++ { ++ struct hlsl_ir_node *mask; ++ ++ if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) ++ return; ++ list_add_after(&cast->entry, &mask->entry); ++ ++ if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) ++ return; ++ list_add_after(&mask->entry, &cast->entry); ++ } ++ ++ hlsl_src_remove(entry->args); ++ hlsl_src_from_node(entry->args, cast); ++ ++ fold_state_value(ctx, entry); ++ } + } + +-static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) ++static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct hlsl_state_block *block, ++ unsigned int entry_index, struct fx_write_context *fx) + { +- uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; +- struct vkd3d_bytecode_buffer *buffer = &fx->structured; +- uint32_t count_offset, count; ++ struct hlsl_state_block_entry *entry = block->entries[entry_index]; ++ const struct state_block_function_info *info; ++ struct function_component components[9]; ++ struct hlsl_ctx *ctx = fx->ctx; ++ unsigned int i; + +- for (i = 0; i < elements_count; ++i) ++ if (!entry->is_function_call) ++ return 1; ++ ++ if (!(info = get_state_block_function_info(entry->name))) ++ return 1; ++ ++ if (info->min_profile > ctx->profile->major_version) + { +- struct hlsl_state_block *block; ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, ++ "State %s is not supported for this profile.", entry->name); ++ return 1; ++ } + +- count_offset = put_u32(buffer, 0); ++ /* For single argument case simply replace the name. */ ++ if (info->min_args == info->max_args && info->min_args == 1) ++ { ++ vkd3d_free(entry->name); ++ entry->name = hlsl_strdup(ctx, info->components[0].name); ++ return 1; ++ } + +- count = 0; +- if (var->state_blocks) ++ if (!vkd3d_array_reserve((void **)&block->entries, &block->capacity, block->count + entry->args_count - 1, ++ sizeof(*block->entries))) ++ return 1; ++ if (entry_index != block->count - 1) ++ { ++ memmove(&block->entries[entry_index + entry->args_count], &block->entries[entry_index + 1], ++ (block->count - entry_index - 1) * sizeof(*block->entries)); ++ } ++ block->count += entry->args_count - 1; ++ ++ get_state_block_function_components(info, components, entry->args_count); ++ ++ for (i = 0; i < entry->args_count; ++i) ++ { ++ const struct function_component *comp = &components[i]; ++ unsigned int arg_index = (i + 1) % entry->args_count; ++ block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, ++ comp->lhs_has_index, comp->lhs_index, arg_index); ++ } ++ hlsl_free_state_block_entry(entry); ++ ++ return entry->args_count; ++} ++ ++static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, ++ uint32_t count_offset, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ struct hlsl_state_block *block; ++ uint32_t i, count = 0; ++ ++ if (var->state_blocks) ++ { ++ block = var->state_blocks[block_index]; ++ ++ for (i = 0; i < block->count;) + { +- block = var->state_blocks[i]; ++ i += decompose_fx_4_state_block(var, block, i, fx); ++ } + +- for (j = 0; j < block->count; ++j) +- { +- struct hlsl_state_block_entry *entry = block->entries[j]; ++ for (i = 0; i < block->count; ++i) ++ { ++ struct hlsl_state_block_entry *entry = block->entries[i]; + +- /* Skip if property is reassigned later. This will use the last assignment. */ +- if (state_block_contains_state(entry->name, j + 1, block)) +- continue; ++ /* Skip if property is reassigned later. This will use the last assignment. */ ++ if (state_block_contains_state(entry->name, i + 1, block)) ++ continue; + +- /* Resolve special constant names and property names. */ +- resolve_fx_4_state_block_values(var, entry, fx); ++ /* Resolve special constant names and property names. */ ++ resolve_fx_4_state_block_values(var, entry, fx); + +- write_fx_4_state_assignment(var, entry, fx); +- ++count; +- } ++ write_fx_4_state_assignment(var, entry, fx); ++ ++count; + } ++ } ++ ++ set_u32(buffer, count_offset, count); ++} ++ ++static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t count_offset; + +- set_u32(buffer, count_offset, count); ++ for (i = 0; i < elements_count; ++i) ++ { ++ count_offset = put_u32(buffer, 0); ++ ++ write_fx_4_state_block(var, i, count_offset, fx); ++ } ++} ++ ++static void write_fx_4_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); ++ unsigned int i; ++ ++ /* FIXME: write shader blobs, once parser support works. */ ++ for (i = 0; i < elements_count; ++i) ++ put_u32(buffer, 0); ++} ++ ++static void write_fx_5_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); ++ unsigned int i; ++ ++ /* FIXME: write shader blobs, once parser support works. */ ++ for (i = 0; i < elements_count; ++i) ++ { ++ put_u32(buffer, 0); /* Blob offset */ ++ put_u32(buffer, 0); /* SODecl[0] offset */ ++ put_u32(buffer, 0); /* SODecl[1] offset */ ++ put_u32(buffer, 0); /* SODecl[2] offset */ ++ put_u32(buffer, 0); /* SODecl[3] offset */ ++ put_u32(buffer, 0); /* SODecl count */ ++ put_u32(buffer, 0); /* Rasterizer stream */ ++ put_u32(buffer, 0); /* Interface bindings count */ ++ put_u32(buffer, 0); /* Interface initializer offset */ + } + } + +@@ -1336,7 +2002,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t semantic_offset, bind_point = ~0u; +- uint32_t name_offset, type_offset, i; ++ uint32_t name_offset, type_offset; + struct hlsl_ctx *ctx = fx->ctx; + + if (var->reg_reservation.reg_type) +@@ -1344,7 +2010,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + + type_offset = write_type(var->data_type, fx); + name_offset = write_string(var->name, fx); +- semantic_offset = write_string(var->semantic.name, fx); ++ semantic_offset = write_string(var->semantic.raw_name, fx); + + put_u32(buffer, name_offset); + put_u32(buffer, type_offset); +@@ -1373,9 +2039,14 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_VERTEX_SHADER: +- /* FIXME: write shader blobs, once parser support works. */ +- for (i = 0; i < elements_count; ++i) +- put_u32(buffer, 0); ++ write_fx_4_shader_initializer(var, fx); ++ fx->shader_count += elements_count; ++ break; ++ ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ write_fx_5_shader_initializer(var, fx); + fx->shader_count += elements_count; + break; + +@@ -1383,19 +2054,27 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + fx->dsv_count += elements_count; + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ write_fx_4_state_object_initializer(var, fx); ++ fx->depth_stencil_state_count += elements_count; ++ break; ++ + case HLSL_CLASS_SAMPLER: + write_fx_4_state_object_initializer(var, fx); + fx->sampler_state_count += elements_count; + break; + ++ case HLSL_CLASS_RASTERIZER_STATE: ++ write_fx_4_state_object_initializer(var, fx); ++ fx->rasterizer_state_count += elements_count; ++ break; ++ + default: +- hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", +- type->e.numeric.type); ++ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object class %u is not implemented.", ++ type->class); + } + +- put_u32(buffer, 0); /* Annotations count */ +- if (has_annotations(var)) +- hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); ++ write_fx_4_annotations(var->annotations, fx); + + ++fx->object_variable_count; + } +@@ -1438,9 +2117,7 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + } + else + { +- put_u32(buffer, 0); /* Annotations count */ +- if (b->annotations) +- hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); ++ write_fx_4_annotations(b->annotations, fx); + ++fx->buffer_count; + } + +@@ -1464,6 +2141,9 @@ static void write_buffers(struct fx_write_context *fx, bool shared) + { + struct hlsl_buffer *buffer; + ++ if (shared && !fx->child_effect) ++ return; ++ + LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->size && !fx->include_empty_buffers) +@@ -1483,12 +2163,20 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc + + switch (type->class) + { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: + return true; ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ if (ctx->profile->major_version < 5) ++ return false; ++ return true; + case HLSL_CLASS_UAV: + if (ctx->profile->major_version < 5) + return false; +@@ -1551,9 +2239,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ + put_u32(&buffer, 0); /* String count. */ + put_u32(&buffer, fx.texture_count); +- put_u32(&buffer, 0); /* Depth stencil state count. */ ++ put_u32(&buffer, fx.depth_stencil_state_count); + put_u32(&buffer, 0); /* Blend state count. */ +- put_u32(&buffer, 0); /* Rasterizer state count. */ ++ put_u32(&buffer, fx.rasterizer_state_count); + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); + put_u32(&buffer, fx.dsv_count); +@@ -1609,9 +2297,9 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ + put_u32(&buffer, 0); /* String count. */ + put_u32(&buffer, fx.texture_count); +- put_u32(&buffer, 0); /* Depth stencil state count. */ ++ put_u32(&buffer, fx.depth_stencil_state_count); + put_u32(&buffer, 0); /* Blend state count. */ +- put_u32(&buffer, 0); /* Rasterizer state count. */ ++ put_u32(&buffer, fx.rasterizer_state_count); + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); + put_u32(&buffer, fx.dsv_count); +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 3e482a5fc70..10e12ea56f2 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -18,10 +18,23 @@ + + #include "vkd3d_shader_private.h" + ++struct glsl_src ++{ ++ struct vkd3d_string_buffer *str; ++}; ++ ++struct glsl_dst ++{ ++ const struct vkd3d_shader_dst_param *vsir; ++ struct vkd3d_string_buffer *register_name; ++ struct vkd3d_string_buffer *mask; ++}; ++ + struct vkd3d_glsl_generator + { + struct vsir_program *program; +- struct vkd3d_string_buffer buffer; ++ struct vkd3d_string_buffer_cache string_buffers; ++ struct vkd3d_string_buffer *buffer; + struct vkd3d_shader_location location; + struct vkd3d_shader_message_context *message_context; + unsigned int indent; +@@ -45,18 +58,149 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne + vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); + } + ++static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, ++ struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) ++{ ++ switch (reg->type) ++ { ++ case VKD3DSPR_TEMP: ++ vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); ++ break; ++ ++ default: ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled register type %#x.", reg->type); ++ vkd3d_string_buffer_printf(buffer, "", reg->type); ++ break; ++ } ++} ++ ++static void shader_glsl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask) ++{ ++ const char swizzle_chars[] = "xyzw"; ++ unsigned int i; ++ ++ vkd3d_string_buffer_printf(buffer, "."); ++ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) ++ { ++ if (mask & (VKD3DSP_WRITEMASK_0 << i)) ++ vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]); ++ } ++} ++ ++static void shader_glsl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask) ++{ ++ vkd3d_string_buffer_printf(buffer, "."); ++ if (write_mask & VKD3DSP_WRITEMASK_0) ++ vkd3d_string_buffer_printf(buffer, "x"); ++ if (write_mask & VKD3DSP_WRITEMASK_1) ++ vkd3d_string_buffer_printf(buffer, "y"); ++ if (write_mask & VKD3DSP_WRITEMASK_2) ++ vkd3d_string_buffer_printf(buffer, "z"); ++ if (write_mask & VKD3DSP_WRITEMASK_3) ++ vkd3d_string_buffer_printf(buffer, "w"); ++} ++ ++static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_cache *cache) ++{ ++ vkd3d_string_buffer_release(cache, src->str); ++} ++ ++static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) ++{ ++ const struct vkd3d_shader_register *reg = &vsir_src->reg; ++ ++ glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ if (reg->non_uniform) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled 'non-uniform' modifer."); ++ if (vsir_src->modifiers) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); ++ ++ shader_glsl_print_register_name(glsl_src->str, gen, reg); ++ if (reg->dimension == VSIR_DIMENSION_VEC4) ++ shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); ++} ++ ++static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) ++{ ++ vkd3d_string_buffer_release(cache, dst->mask); ++ vkd3d_string_buffer_release(cache, dst->register_name); ++} ++ ++static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst) ++{ ++ uint32_t write_mask = vsir_dst->write_mask; ++ ++ if (ins->flags & VKD3DSI_PRECISE_XYZW) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled 'precise' modifer."); ++ if (vsir_dst->reg.non_uniform) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled 'non-uniform' modifer."); ++ ++ glsl_dst->vsir = vsir_dst; ++ glsl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); ++ glsl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); ++ ++ shader_glsl_print_register_name(glsl_dst->register_name, gen, &vsir_dst->reg); ++ shader_glsl_print_write_mask(glsl_dst->mask, write_mask); ++ ++ return write_mask; ++} ++ ++static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( ++ struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) ++{ ++ va_list args; ++ ++ if (dst->vsir->shift) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); ++ if (dst->vsir->modifiers) ++ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, ++ "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); ++ ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); ++ ++ va_start(args, format); ++ vkd3d_string_buffer_vprintf(gen->buffer, format, args); ++ va_end(args); ++ ++ vkd3d_string_buffer_printf(gen->buffer, ";\n"); ++} ++ + static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) + { +- shader_glsl_print_indent(&gen->buffer, gen->indent); +- vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "/* */\n", ins->opcode); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); ++ "Internal compiler error: Unhandled instruction %#x.", ins->opcode); + } + +-static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, +- const struct vkd3d_shader_instruction *ins) ++static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) ++{ ++ struct glsl_src src; ++ struct glsl_dst dst; ++ uint32_t mask; ++ ++ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); ++ glsl_src_init(&src, gen, &ins->src[0], mask); ++ ++ shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); ++ ++ glsl_src_cleanup(&src, &gen->string_buffers); ++ glsl_dst_cleanup(&dst, &gen->string_buffers); ++} ++ ++static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) + { +- const struct vkd3d_shader_version *version = &generator->program->shader_version; ++ const struct vkd3d_shader_version *version = &gen->program->shader_version; + + /* + * TODO: Implement in_subroutine +@@ -64,45 +208,59 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, + */ + if (version->major >= 4) + { +- shader_glsl_print_indent(&generator->buffer, generator->indent); +- vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); ++ shader_glsl_print_indent(gen->buffer, gen->indent); ++ vkd3d_string_buffer_printf(gen->buffer, "return;\n"); + } + } + +-static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, +- const struct vkd3d_shader_instruction *instruction) ++static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, ++ const struct vkd3d_shader_instruction *ins) + { +- generator->location = instruction->location; ++ gen->location = ins->location; + +- switch (instruction->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL_INPUT: + case VKD3DSIH_DCL_OUTPUT: + case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_NOP: + break; ++ case VKD3DSIH_MOV: ++ shader_glsl_mov(gen, ins); ++ break; + case VKD3DSIH_RET: +- shader_glsl_ret(generator, instruction); ++ shader_glsl_ret(gen, ins); + break; + default: +- shader_glsl_unhandled(generator, instruction); ++ shader_glsl_unhandled(gen, ins); + break; + } + } + ++static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) ++{ ++ const struct vsir_program *program = gen->program; ++ struct vkd3d_string_buffer *buffer = gen->buffer; ++ ++ if (program->temp_count) ++ vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); ++} ++ + static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) + { + const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; +- struct vkd3d_string_buffer *buffer = &gen->buffer; ++ struct vkd3d_string_buffer *buffer = gen->buffer; + unsigned int i; + void *code; + +- ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); ++ MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + + vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); + + vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + ++ shader_glsl_generate_declarations(gen); ++ + vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); + + ++gen->indent; +@@ -132,7 +290,8 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc + + static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) + { +- vkd3d_string_buffer_cleanup(&gen->buffer); ++ vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); ++ vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); + } + + static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, +@@ -140,7 +299,8 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, + { + memset(gen, 0, sizeof(*gen)); + gen->program = program; +- vkd3d_string_buffer_init(&gen->buffer); ++ vkd3d_string_buffer_cache_init(&gen->string_buffers); ++ gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); + gen->message_context = message_context; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 99214fba6de..7f85195382d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -134,7 +134,7 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) + return hlsl_get_var(scope->upper, name); + } + +-static void free_state_block_entry(struct hlsl_state_block_entry *entry) ++void hlsl_free_state_block_entry(struct hlsl_state_block_entry *entry) + { + unsigned int i; + +@@ -151,9 +151,9 @@ void hlsl_free_state_block(struct hlsl_state_block *state_block) + { + unsigned int k; + +- assert(state_block); ++ VKD3D_ASSERT(state_block); + for (k = 0; k < state_block->count; ++k) +- free_state_block_entry(state_block->entries[k]); ++ hlsl_free_state_block_entry(state_block->entries[k]); + vkd3d_free(state_block->entries); + vkd3d_free(state_block); + } +@@ -167,6 +167,8 @@ void hlsl_free_var(struct hlsl_ir_var *decl) + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); + ++ vkd3d_free(decl->default_values); ++ + for (i = 0; i < decl->state_block_count; ++i) + hlsl_free_state_block(decl->state_blocks[i]); + vkd3d_free(decl->state_blocks); +@@ -367,15 +369,22 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + type->reg_size[HLSL_REGSET_UAVS] = 1; + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + break; + } + } +@@ -435,21 +444,28 @@ static bool type_is_single_component(const struct hlsl_type *type) + { + switch (type->class) + { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + return true; + + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_CONSTANT_BUFFER: + return false; + + case HLSL_CLASS_EFFECT_GROUP: +@@ -474,13 +490,13 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + struct hlsl_type *type = *type_ptr; + unsigned int index = *index_ptr; + +- assert(!type_is_single_component(type)); +- assert(index < hlsl_type_component_count(type)); ++ VKD3D_ASSERT(!type_is_single_component(type)); ++ VKD3D_ASSERT(index < hlsl_type_component_count(type)); + + switch (type->class) + { + case HLSL_CLASS_VECTOR: +- assert(index < type->dimx); ++ VKD3D_ASSERT(index < type->dimx); + *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); + *index_ptr = 0; + return index; +@@ -490,7 +506,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + unsigned int y = index / type->dimx, x = index % type->dimx; + bool row_major = hlsl_type_is_row_major(type); + +- assert(index < type->dimx * type->dimy); ++ VKD3D_ASSERT(index < type->dimx * type->dimy); + *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); + *index_ptr = row_major ? x : y; + return row_major ? y : x; +@@ -504,7 +520,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + *type_ptr = type->e.array.type; + *index_ptr = index % elem_comp_count; + array_index = index / elem_comp_count; +- assert(array_index < type->e.array.elements_count); ++ VKD3D_ASSERT(array_index < type->e.array.elements_count); + return array_index; + } + +@@ -528,6 +544,12 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, + vkd3d_unreachable(); + } + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ { ++ *type_ptr = type->e.resource.format; ++ return traverse_path_from_component_index(ctx, type_ptr, index_ptr); ++ } ++ + default: + vkd3d_unreachable(); + } +@@ -556,12 +578,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + + switch (type->class) + { +- case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +- case HLSL_CLASS_MATRIX: + offset[HLSL_REGSET_NUMERIC] += idx; + break; + ++ case HLSL_CLASS_MATRIX: ++ offset[HLSL_REGSET_NUMERIC] += 4 * idx; ++ break; ++ + case HLSL_CLASS_STRUCT: + for (r = 0; r <= HLSL_REGSET_LAST; ++r) + offset[r] += type->e.record.fields[idx].reg_offset[r]; +@@ -577,21 +601,29 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + } + break; + ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: +- assert(idx == 0); ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: ++ VKD3D_ASSERT(idx == 0); + break; + + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_SCALAR: ++ case HLSL_CLASS_CONSTANT_BUFFER: + vkd3d_unreachable(); + } + type = next_type; +@@ -638,9 +670,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d + deref->rel_offset.node = NULL; + deref->const_offset = 0; + +- assert(chain); ++ VKD3D_ASSERT(chain); + if (chain->type == HLSL_IR_INDEX) +- assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); ++ VKD3D_ASSERT(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); + + /* Find the length of the index chain */ + chain_len = 0; +@@ -687,7 +719,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d + chain_len++; + ptr = index->val.node; + } +- assert(deref->path_len == load->src.path_len + chain_len); ++ VKD3D_ASSERT(deref->path_len == load->src.path_len + chain_len); + + return true; + } +@@ -697,7 +729,7 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de + struct hlsl_type *type; + unsigned int i; + +- assert(deref); ++ VKD3D_ASSERT(deref); + + if (hlsl_deref_is_lowered(deref)) + return deref->data_type; +@@ -752,7 +784,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl + hlsl_src_from_node(&deref->path[deref_path_len++], c); + } + +- assert(deref_path_len == deref->path_len); ++ VKD3D_ASSERT(deref_path_len == deref->path_len); + + return true; + } +@@ -760,7 +792,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl + struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, const struct hlsl_type *type, + struct hlsl_ir_node *idx) + { +- assert(idx); ++ VKD3D_ASSERT(idx); + + switch (type->class) + { +@@ -780,7 +812,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co + { + struct hlsl_ir_constant *c = hlsl_ir_constant(idx); + +- assert(c->value.u[0].u < type->e.record.field_count); ++ VKD3D_ASSERT(c->value.u[0].u < type->e.record.field_count); + return type->e.record.fields[c->value.u[0].u].type; + } + +@@ -865,6 +897,20 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim + return type; + } + ++struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format) ++{ ++ struct hlsl_type *type; ++ ++ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) ++ return NULL; ++ type->class = HLSL_CLASS_CONSTANT_BUFFER; ++ type->dimy = 1; ++ type->e.resource.format = format; ++ hlsl_type_calculate_reg_size(ctx, type); ++ list_add_tail(&ctx->types, &type->entry); ++ return type; ++} ++ + static const char * get_case_insensitive_typename(const char *name) + { + static const char *const names[] = +@@ -956,14 +1002,23 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_ARRAY: + return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ return hlsl_type_component_count(type->e.resource.format); ++ ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + return 1; + + case HLSL_CLASS_EFFECT_GROUP: +@@ -1038,14 +1093,23 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + case HLSL_CLASS_TECHNIQUE: + return t1->e.version == t2->e.version; + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); ++ ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_STRING: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + return true; + } + +@@ -1247,6 +1311,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const cha + list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); + else + list_add_tail(&ctx->globals->vars, &var->scope_entry); ++ var->is_synthetic = true; + } + return var; + } +@@ -1265,7 +1330,7 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc + if (!other) + return true; + +- assert(!hlsl_deref_is_lowered(other)); ++ VKD3D_ASSERT(!hlsl_deref_is_lowered(other)); + + if (!init_deref(ctx, deref, other->var, other->path_len)) + return false; +@@ -1322,8 +1387,8 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls + struct hlsl_ir_store *store; + unsigned int i; + +- assert(lhs); +- assert(!hlsl_deref_is_lowered(lhs)); ++ VKD3D_ASSERT(lhs); ++ VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); + + if (!(store = hlsl_alloc(ctx, sizeof(*store)))) + return NULL; +@@ -1394,7 +1459,7 @@ struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *t + { + struct hlsl_ir_constant *c; + +- assert(type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR); + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + return NULL; +@@ -1439,6 +1504,24 @@ struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n + return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); + } + ++struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, ++ const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_string_constant *s; ++ ++ if (!(s = hlsl_alloc(ctx, sizeof(*s)))) ++ return NULL; ++ ++ init_node(&s->node, HLSL_IR_STRING_CONSTANT, ctx->builtin_types.string, loc); ++ ++ if (!(s->string = hlsl_strdup(ctx, str))) ++ { ++ hlsl_free_instr(&s->node); ++ return NULL; ++ } ++ return &s->node; ++} ++ + struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], + struct hlsl_type *data_type, const struct vkd3d_shader_location *loc) +@@ -1468,7 +1551,7 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; + +- assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); ++ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); + } + +@@ -1477,8 +1560,8 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; + +- assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); +- assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); ++ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); ++ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); + } + +@@ -1540,7 +1623,7 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl + struct hlsl_type *type; + unsigned int i; + +- assert(!hlsl_deref_is_lowered(deref)); ++ VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); + + type = hlsl_deref_get_type(ctx, deref); + if (idx) +@@ -1569,7 +1652,7 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls + /* This deref can only exists temporarily because it is not the real owner of its members. */ + struct hlsl_deref tmp_deref; + +- assert(deref->path_len >= 1); ++ VKD3D_ASSERT(deref->path_len >= 1); + + tmp_deref = *deref; + tmp_deref.path_len = deref->path_len - 1; +@@ -1674,7 +1757,7 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned + + if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) + return NULL; +- assert(hlsl_is_numeric_type(val->data_type)); ++ VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); + if (components == 1) + type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); + else +@@ -1765,7 +1848,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type + } + + struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, const struct vkd3d_shader_location *loc) ++ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, ++ unsigned int unroll_limit, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_loop *loop; + +@@ -1774,6 +1858,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, + init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); + hlsl_block_init(&loop->body); + hlsl_block_add_block(&loop->body, block); ++ ++ loop->unroll_type = unroll_type; ++ loop->unroll_limit = unroll_limit; + return &loop->node; + } + +@@ -1836,9 +1923,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct + return map->instrs[i].dst; + } + +- /* The block passed to hlsl_clone_block() should have been free of external +- * references. */ +- vkd3d_unreachable(); ++ return src; + } + + static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, +@@ -1846,7 +1931,7 @@ static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, + { + unsigned int i; + +- assert(!hlsl_deref_is_lowered(src)); ++ VKD3D_ASSERT(!hlsl_deref_is_lowered(src)); + + if (!init_deref(ctx, dst, src->var, src->path_len)) + return false; +@@ -1935,7 +2020,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ + if (!clone_block(ctx, &body, &src->body, map)) + return NULL; + +- if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) ++ if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) + { + hlsl_block_cleanup(&body); + return NULL; +@@ -1992,6 +2077,11 @@ static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx, + return &dst->node; + } + ++static struct hlsl_ir_node *clone_string_constant(struct hlsl_ctx *ctx, struct hlsl_ir_string_constant *src) ++{ ++ return hlsl_new_string_constant(ctx, src->string, &src->node.loc); ++} ++ + static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_store *src) + { + struct hlsl_ir_store *dst; +@@ -2034,6 +2124,43 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, + return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); + } + ++struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, ++ struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, ++ unsigned int lhs_index, unsigned int arg_index) ++{ ++ struct hlsl_state_block_entry *entry; ++ struct clone_instr_map map = { 0 }; ++ ++ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) ++ return NULL; ++ entry->name = hlsl_strdup(ctx, name); ++ entry->lhs_has_index = lhs_has_index; ++ entry->lhs_index = lhs_index; ++ if (!(entry->instrs = hlsl_alloc(ctx, sizeof(*entry->instrs)))) ++ { ++ hlsl_free_state_block_entry(entry); ++ return NULL; ++ } ++ ++ entry->args_count = 1; ++ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) ++ { ++ hlsl_free_state_block_entry(entry); ++ return NULL; ++ } ++ ++ hlsl_block_init(entry->instrs); ++ if (!clone_block(ctx, entry->instrs, src->instrs, &map)) ++ { ++ hlsl_free_state_block_entry(entry); ++ return NULL; ++ } ++ clone_src(&map, entry->args, &src->args[arg_index]); ++ vkd3d_free(map.instrs); ++ ++ return entry; ++} ++ + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) + { + hlsl_block_cleanup(&c->body); +@@ -2121,6 +2248,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, + case HLSL_IR_RESOURCE_STORE: + return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr)); + ++ case HLSL_IR_STRING_CONSTANT: ++ return clone_string_constant(ctx, hlsl_ir_string_constant(instr)); ++ + case HLSL_IR_STORE: + return clone_store(ctx, map, hlsl_ir_store(instr)); + +@@ -2249,7 +2379,7 @@ void hlsl_pop_scope(struct hlsl_ctx *ctx) + { + struct hlsl_scope *prev_scope = ctx->cur_scope->upper; + +- assert(prev_scope); ++ VKD3D_ASSERT(prev_scope); + TRACE("Popping current scope.\n"); + ctx->cur_scope = prev_scope; + } +@@ -2327,17 +2457,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + switch (type->class) + { + case HLSL_CLASS_SCALAR: +- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); ++ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); + return string; + + case HLSL_CLASS_VECTOR: +- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); ++ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); + return string; + + case HLSL_CLASS_MATRIX: +- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); ++ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); + vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); + return string; + +@@ -2375,15 +2505,15 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + return string; + } + +- assert(hlsl_is_numeric_type(type->e.resource.format)); +- assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); ++ VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); ++ VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); + if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) + { + vkd3d_string_buffer_printf(string, "Buffer"); + } + else + { +- assert(type->sampler_dim < ARRAY_SIZE(dimensions)); ++ VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); + vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); + } + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) +@@ -2407,16 +2537,31 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + } + return string; + ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ vkd3d_string_buffer_printf(string, "ConstantBuffer"); ++ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) ++ { ++ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); ++ hlsl_release_string_buffer(ctx, inner_string); ++ } ++ return string; ++ ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + break; + } + +@@ -2513,19 +2658,21 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) + { + static const char * const names[] = + { +- [HLSL_IR_CALL ] = "HLSL_IR_CALL", +- [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", +- [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", +- [HLSL_IR_IF ] = "HLSL_IR_IF", +- [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", +- [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", +- [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", +- [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", +- [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", +- [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", +- [HLSL_IR_STORE ] = "HLSL_IR_STORE", +- [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", +- [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", ++ [HLSL_IR_CALL ] = "HLSL_IR_CALL", ++ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", ++ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", ++ [HLSL_IR_IF ] = "HLSL_IR_IF", ++ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", ++ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", ++ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", ++ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", ++ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", ++ [HLSL_IR_RESOURCE_STORE ] = "HLSL_IR_RESOURCE_STORE", ++ [HLSL_IR_STRING_CONSTANT] = "HLSL_IR_STRING_CONSTANT", ++ [HLSL_IR_STORE ] = "HLSL_IR_STORE", ++ [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", ++ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", ++ [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", + }; + + if (type >= ARRAY_SIZE(names)) +@@ -2544,7 +2691,7 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) + [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", + }; + +- assert(type < ARRAY_SIZE(names)); ++ VKD3D_ASSERT(type < ARRAY_SIZE(names)); + return names[type]; + } + +@@ -2634,7 +2781,7 @@ const char *debug_hlsl_writemask(unsigned int writemask) + char string[5]; + unsigned int i = 0, pos = 0; + +- assert(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); ++ VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); + + while (writemask) + { +@@ -2653,7 +2800,7 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) + char string[5]; + unsigned int i; + +- assert(size <= ARRAY_SIZE(components)); ++ VKD3D_ASSERT(size <= ARRAY_SIZE(components)); + for (i = 0; i < size; ++i) + string[i] = components[hlsl_swizzle_get_component(swizzle, i)]; + string[size] = 0; +@@ -2735,6 +2882,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + static const char *const op_names[] = + { + [HLSL_OP0_VOID] = "void", ++ [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", + + [HLSL_OP1_ABS] = "abs", + [HLSL_OP1_BIT_NOT] = "~", +@@ -2749,6 +2897,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP1_DSY_COARSE] = "dsy_coarse", + [HLSL_OP1_DSY_FINE] = "dsy_fine", + [HLSL_OP1_EXP2] = "exp2", ++ [HLSL_OP1_F16TOF32] = "f16tof32", + [HLSL_OP1_FLOOR] = "floor", + [HLSL_OP1_FRACT] = "fract", + [HLSL_OP1_LOG2] = "log2", +@@ -2790,6 +2939,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) + [HLSL_OP3_CMP] = "cmp", + [HLSL_OP3_DP2ADD] = "dp2add", + [HLSL_OP3_TERNARY] = "ternary", ++ [HLSL_OP3_MAD] = "mad", + }; + + return op_names[op]; +@@ -2875,7 +3025,7 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru + [HLSL_RESOURCE_RESINFO] = "resinfo", + }; + +- assert(load->load_type < ARRAY_SIZE(type_names)); ++ VKD3D_ASSERT(load->load_type < ARRAY_SIZE(type_names)); + vkd3d_string_buffer_printf(buffer, "%s(resource = ", type_names[load->load_type]); + dump_deref(buffer, &load->resource); + vkd3d_string_buffer_printf(buffer, ", sampler = "); +@@ -2929,6 +3079,11 @@ static void dump_ir_resource_store(struct vkd3d_string_buffer *buffer, const str + vkd3d_string_buffer_printf(buffer, ")"); + } + ++static void dump_ir_string(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_string_constant *string) ++{ ++ vkd3d_string_buffer_printf(buffer, "\"%s\"", debugstr_a(string->string)); ++} ++ + static void dump_ir_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) + { + vkd3d_string_buffer_printf(buffer, "= ("); +@@ -3048,6 +3203,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, + dump_ir_resource_store(buffer, hlsl_ir_resource_store(instr)); + break; + ++ case HLSL_IR_STRING_CONSTANT: ++ dump_ir_string(buffer, hlsl_ir_string_constant(instr)); ++ break; ++ + case HLSL_IR_STORE: + dump_ir_store(buffer, hlsl_ir_store(instr)); + break; +@@ -3086,12 +3245,39 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl + vkd3d_string_buffer_cleanup(&buffer); + } + ++void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) ++{ ++ unsigned int k, component_count = hlsl_type_component_count(var->data_type); ++ struct vkd3d_string_buffer buffer; ++ ++ vkd3d_string_buffer_init(&buffer); ++ if (!var->default_values) ++ { ++ vkd3d_string_buffer_printf(&buffer, "var \"%s\" has no default values.\n", var->name); ++ vkd3d_string_buffer_trace(&buffer); ++ vkd3d_string_buffer_cleanup(&buffer); ++ return; ++ } ++ ++ vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); ++ for (k = 0; k < component_count; ++k) ++ { ++ if (k % 4 == 0) ++ vkd3d_string_buffer_printf(&buffer, "\n "); ++ vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].value.u); ++ } ++ vkd3d_string_buffer_printf(&buffer, "\n"); ++ ++ vkd3d_string_buffer_trace(&buffer); ++ vkd3d_string_buffer_cleanup(&buffer); ++} ++ + void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) + { + struct hlsl_src *src, *next; + +- assert(old->data_type->dimx == new->data_type->dimx); +- assert(old->data_type->dimy == new->data_type->dimy); ++ VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx); ++ VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy); + + LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) + { +@@ -3199,6 +3385,12 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) + vkd3d_free(load); + } + ++static void free_ir_string_constant(struct hlsl_ir_string_constant *string) ++{ ++ vkd3d_free(string->string); ++ vkd3d_free(string); ++} ++ + static void free_ir_resource_store(struct hlsl_ir_resource_store *store) + { + hlsl_cleanup_deref(&store->resource); +@@ -3243,7 +3435,7 @@ static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *cons + + void hlsl_free_instr(struct hlsl_ir_node *node) + { +- assert(list_empty(&node->uses)); ++ VKD3D_ASSERT(list_empty(&node->uses)); + + switch (node->type) + { +@@ -3283,6 +3475,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) + free_ir_resource_load(hlsl_ir_resource_load(node)); + break; + ++ case HLSL_IR_STRING_CONSTANT: ++ free_ir_string_constant(hlsl_ir_string_constant(node)); ++ break; ++ + case HLSL_IR_RESOURCE_STORE: + free_ir_resource_store(hlsl_ir_resource_store(node)); + break; +@@ -3319,9 +3515,25 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) + void hlsl_cleanup_semantic(struct hlsl_semantic *semantic) + { + vkd3d_free((void *)semantic->name); ++ vkd3d_free((void *)semantic->raw_name); + memset(semantic, 0, sizeof(*semantic)); + } + ++bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src) ++{ ++ *dst = *src; ++ dst->name = dst->raw_name = NULL; ++ if (src->name && !(dst->name = hlsl_strdup(ctx, src->name))) ++ return false; ++ if (src->raw_name && !(dst->raw_name = hlsl_strdup(ctx, src->raw_name))) ++ { ++ hlsl_cleanup_semantic(dst); ++ return false; ++ } ++ ++ return true; ++} ++ + static void free_function_decl(struct hlsl_ir_function_decl *decl) + { + unsigned int i; +@@ -3710,15 +3922,21 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + ctx->builtin_types.sampler[bt] = type; + } + ++ ctx->builtin_types.string = hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING); + ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); +- hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "ComputeShader", HLSL_CLASS_COMPUTE_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DomainShader", HLSL_CLASS_DOMAIN_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "HullShader", HLSL_CLASS_HULL_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "GeometryShader", HLSL_CLASS_GEOMETRY_SHADER)); + + for (i = 0; i < ARRAY_SIZE(effect_types); ++i) + { +@@ -4049,6 +4267,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct + /* Save and restore everything that matters. + * Note that saving the scope stack is hard, and shouldn't be necessary. */ + ++ hlsl_push_scope(ctx); + ctx->scanner = NULL; + ctx->internal_func_name = internal_name->buffer; + ctx->cur_function = NULL; +@@ -4056,6 +4275,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct + ctx->scanner = saved_scanner; + ctx->internal_func_name = saved_internal_func_name; + ctx->cur_function = saved_cur_function; ++ hlsl_pop_scope(ctx); + if (ret) + { + ERR("Failed to compile intrinsic, error %u.\n", ret); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 27814f3a56f..7e8cd774ae2 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -78,10 +78,12 @@ enum hlsl_type_class + HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, + HLSL_CLASS_STRUCT, + HLSL_CLASS_ARRAY, ++ HLSL_CLASS_DEPTH_STENCIL_STATE, + HLSL_CLASS_DEPTH_STENCIL_VIEW, + HLSL_CLASS_EFFECT_GROUP, + HLSL_CLASS_PASS, + HLSL_CLASS_PIXEL_SHADER, ++ HLSL_CLASS_RASTERIZER_STATE, + HLSL_CLASS_RENDER_TARGET_VIEW, + HLSL_CLASS_SAMPLER, + HLSL_CLASS_STRING, +@@ -89,6 +91,11 @@ enum hlsl_type_class + HLSL_CLASS_TEXTURE, + HLSL_CLASS_UAV, + HLSL_CLASS_VERTEX_SHADER, ++ HLSL_CLASS_COMPUTE_SHADER, ++ HLSL_CLASS_DOMAIN_SHADER, ++ HLSL_CLASS_HULL_SHADER, ++ HLSL_CLASS_GEOMETRY_SHADER, ++ HLSL_CLASS_CONSTANT_BUFFER, + HLSL_CLASS_VOID, + }; + +@@ -222,6 +229,8 @@ struct hlsl_semantic + const char *name; + uint32_t index; + ++ /* Name exactly as it appears in the sources. */ ++ const char *raw_name; + /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ + bool reported_missing; + /* In case the variable or field that stores this semantic has already reported to use a +@@ -259,8 +268,20 @@ struct hlsl_struct_field + * struct. */ + struct hlsl_reg + { +- /* Index of the first register allocated. */ ++ /* Register number of the first register allocated. */ + uint32_t id; ++ /* For descriptors (buffer, texture, sampler, UAV) this is the base binding ++ * index of the descriptor. ++ * For 5.1 and above descriptors have space and may be arrayed, in which ++ * case the array shares a single register ID but has a range of register ++ * indices, and "id" and "index" are as a rule not equal. ++ * For versions below 5.1, the register number for descriptors is the same ++ * as its external binding index, so only "index" is used, and "id" is ++ * ignored. ++ * For numeric registers "index" is not used. */ ++ uint32_t index; ++ /* Register space of a descriptor. Not used for numeric registers. */ ++ uint32_t space; + /* Number of registers to be allocated. + * Unlike the variable's type's regsize, it is not expressed in register components, but rather + * in whole registers, and may depend on which components are used within the shader. */ +@@ -289,6 +310,7 @@ enum hlsl_ir_node_type + HLSL_IR_JUMP, + HLSL_IR_RESOURCE_LOAD, + HLSL_IR_RESOURCE_STORE, ++ HLSL_IR_STRING_CONSTANT, + HLSL_IR_STORE, + HLSL_IR_SWIZZLE, + HLSL_IR_SWITCH, +@@ -371,6 +393,7 @@ struct hlsl_attribute + #define HLSL_STORAGE_LINEAR 0x00010000 + #define HLSL_MODIFIER_SINGLE 0x00020000 + #define HLSL_MODIFIER_EXPORT 0x00040000 ++#define HLSL_STORAGE_ANNOTATION 0x00080000 + + #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ + HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ +@@ -396,6 +419,14 @@ struct hlsl_reg_reservation + unsigned int offset_index; + }; + ++union hlsl_constant_value_component ++{ ++ uint32_t u; ++ int32_t i; ++ float f; ++ double d; ++}; ++ + struct hlsl_ir_var + { + struct hlsl_type *data_type; +@@ -418,6 +449,15 @@ struct hlsl_ir_var + /* Scope that contains annotations for this variable. */ + struct hlsl_scope *annotations; + ++ /* Array of default values the variable was initialized with, one for each component. ++ * Only for variables that need it, such as uniforms and variables inside constant buffers. ++ * This pointer is NULL for others. */ ++ struct hlsl_default_value ++ { ++ /* Default value, in case the component is a numeric value. */ ++ union hlsl_constant_value_component value; ++ } *default_values; ++ + /* A dynamic array containing the state block on the variable's declaration, if any. + * An array variable may contain multiple state blocks. + * A technique pass will always contain one. +@@ -460,6 +500,8 @@ struct hlsl_ir_var + uint32_t is_uniform : 1; + uint32_t is_param : 1; + uint32_t is_separated_resource : 1; ++ uint32_t is_synthetic : 1; ++ uint32_t has_explicit_bind_point : 1; + }; + + /* This struct is used to represent assignments in state block entries: +@@ -470,22 +512,31 @@ struct hlsl_ir_var + * name[lhs_index] = args[0] + * - or - + * name[lhs_index] = {args[0], args[1], ...}; ++ * ++ * This struct also represents function call syntax: ++ * name(args[0], args[1], ...) + */ + struct hlsl_state_block_entry + { +- /* For assignments, the name in the lhs. */ ++ /* Whether this entry is a function call. */ ++ bool is_function_call; ++ ++ /* For assignments, the name in the lhs. ++ * For functions, the name of the function. */ + char *name; + /* Resolved format-specific property identifier. */ + unsigned int name_id; + +- /* Whether the lhs in the assignment is indexed and, in that case, its index. */ ++ /* For assignments, whether the lhs of an assignment is indexed and, in ++ * that case, its index. */ + bool lhs_has_index; + unsigned int lhs_index; + +- /* Instructions present in the rhs. */ ++ /* Instructions present in the rhs or the function arguments. */ + struct hlsl_block *instrs; + +- /* For assignments, arguments of the rhs initializer. */ ++ /* For assignments, arguments of the rhs initializer. ++ * For function calls, the arguments themselves. */ + struct hlsl_src *args; + unsigned int args_count; + }; +@@ -556,12 +607,21 @@ struct hlsl_ir_if + struct hlsl_block else_block; + }; + ++enum hlsl_ir_loop_unroll_type ++{ ++ HLSL_IR_LOOP_UNROLL, ++ HLSL_IR_LOOP_FORCE_UNROLL, ++ HLSL_IR_LOOP_FORCE_LOOP ++}; ++ + struct hlsl_ir_loop + { + struct hlsl_ir_node node; + /* loop condition is stored in the body (as "if (!condition) break;") */ + struct hlsl_block body; + unsigned int next_index; /* liveness index of the end of the loop */ ++ unsigned int unroll_limit; ++ enum hlsl_ir_loop_unroll_type unroll_type; + }; + + struct hlsl_ir_switch_case +@@ -583,13 +643,14 @@ struct hlsl_ir_switch + enum hlsl_ir_expr_op + { + HLSL_OP0_VOID, ++ HLSL_OP0_RASTERIZER_SAMPLE_COUNT, + + HLSL_OP1_ABS, + HLSL_OP1_BIT_NOT, + HLSL_OP1_CAST, + HLSL_OP1_CEIL, + HLSL_OP1_COS, +- HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ ++ HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */ + HLSL_OP1_DSX, + HLSL_OP1_DSX_COARSE, + HLSL_OP1_DSX_FINE, +@@ -597,6 +658,7 @@ enum hlsl_ir_expr_op + HLSL_OP1_DSY_COARSE, + HLSL_OP1_DSY_FINE, + HLSL_OP1_EXP2, ++ HLSL_OP1_F16TOF32, + HLSL_OP1_FLOOR, + HLSL_OP1_FRACT, + HLSL_OP1_LOG2, +@@ -610,7 +672,7 @@ enum hlsl_ir_expr_op + HLSL_OP1_SAT, + HLSL_OP1_SIGN, + HLSL_OP1_SIN, +- HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ ++ HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ + HLSL_OP1_SQRT, + HLSL_OP1_TRUNC, + +@@ -643,6 +705,7 @@ enum hlsl_ir_expr_op + * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ + HLSL_OP3_CMP, + HLSL_OP3_TERNARY, ++ HLSL_OP3_MAD, + }; + + #define HLSL_MAX_OPERANDS 3 +@@ -775,18 +838,18 @@ struct hlsl_ir_constant + struct hlsl_ir_node node; + struct hlsl_constant_value + { +- union hlsl_constant_value_component +- { +- uint32_t u; +- int32_t i; +- float f; +- double d; +- } u[4]; ++ union hlsl_constant_value_component u[4]; + } value; + /* Constant register of type 'c' where the constant value is stored for SM1. */ + struct hlsl_reg reg; + }; + ++struct hlsl_ir_string_constant ++{ ++ struct hlsl_ir_node node; ++ char *string; ++}; ++ + /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, + * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ + struct hlsl_ir_stateblock_constant +@@ -811,6 +874,8 @@ struct hlsl_scope + bool loop; + /* The scope was created for the switch statement. */ + bool _switch; ++ /* The scope contains annotation variables. */ ++ bool annotations; + }; + + struct hlsl_profile_info +@@ -931,6 +996,7 @@ struct hlsl_ctx + /* matrix[HLSL_TYPE_FLOAT][1][3] is a float4x2, i.e. dimx = 2, dimy = 4 */ + struct hlsl_type *matrix[HLSL_TYPE_LAST_SCALAR + 1][4][4]; + struct hlsl_type *sampler[HLSL_SAMPLER_DIM_LAST_SAMPLER + 1]; ++ struct hlsl_type *string; + struct hlsl_type *Void; + } builtin_types; + +@@ -948,6 +1014,8 @@ struct hlsl_ctx + } *regs; + size_t count, size; + } constant_defs; ++ /* 'c' registers where the constants expected by SM2 sincos are stored. */ ++ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; + /* Number of temp. registers required for the shader to run, i.e. the largest temp register + * index that will be used in the output bytecode (+1). */ + uint32_t temp_count; +@@ -994,85 +1062,91 @@ struct hlsl_resource_load_params + + static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_CALL); ++ VKD3D_ASSERT(node->type == HLSL_IR_CALL); + return CONTAINING_RECORD(node, struct hlsl_ir_call, node); + } + + static inline struct hlsl_ir_constant *hlsl_ir_constant(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_CONSTANT); ++ VKD3D_ASSERT(node->type == HLSL_IR_CONSTANT); + return CONTAINING_RECORD(node, struct hlsl_ir_constant, node); + } + ++static inline struct hlsl_ir_string_constant *hlsl_ir_string_constant(const struct hlsl_ir_node *node) ++{ ++ VKD3D_ASSERT(node->type == HLSL_IR_STRING_CONSTANT); ++ return CONTAINING_RECORD(node, struct hlsl_ir_string_constant, node); ++} ++ + static inline struct hlsl_ir_expr *hlsl_ir_expr(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_EXPR); ++ VKD3D_ASSERT(node->type == HLSL_IR_EXPR); + return CONTAINING_RECORD(node, struct hlsl_ir_expr, node); + } + + static inline struct hlsl_ir_if *hlsl_ir_if(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_IF); ++ VKD3D_ASSERT(node->type == HLSL_IR_IF); + return CONTAINING_RECORD(node, struct hlsl_ir_if, node); + } + + static inline struct hlsl_ir_jump *hlsl_ir_jump(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_JUMP); ++ VKD3D_ASSERT(node->type == HLSL_IR_JUMP); + return CONTAINING_RECORD(node, struct hlsl_ir_jump, node); + } + + static inline struct hlsl_ir_load *hlsl_ir_load(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_LOAD); ++ VKD3D_ASSERT(node->type == HLSL_IR_LOAD); + return CONTAINING_RECORD(node, struct hlsl_ir_load, node); + } + + static inline struct hlsl_ir_loop *hlsl_ir_loop(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_LOOP); ++ VKD3D_ASSERT(node->type == HLSL_IR_LOOP); + return CONTAINING_RECORD(node, struct hlsl_ir_loop, node); + } + + static inline struct hlsl_ir_resource_load *hlsl_ir_resource_load(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_RESOURCE_LOAD); ++ VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_LOAD); + return CONTAINING_RECORD(node, struct hlsl_ir_resource_load, node); + } + + static inline struct hlsl_ir_resource_store *hlsl_ir_resource_store(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_RESOURCE_STORE); ++ VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_STORE); + return CONTAINING_RECORD(node, struct hlsl_ir_resource_store, node); + } + + static inline struct hlsl_ir_store *hlsl_ir_store(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_STORE); ++ VKD3D_ASSERT(node->type == HLSL_IR_STORE); + return CONTAINING_RECORD(node, struct hlsl_ir_store, node); + } + + static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_SWIZZLE); ++ VKD3D_ASSERT(node->type == HLSL_IR_SWIZZLE); + return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); + } + + static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_INDEX); ++ VKD3D_ASSERT(node->type == HLSL_IR_INDEX); + return CONTAINING_RECORD(node, struct hlsl_ir_index, node); + } + + static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_SWITCH); ++ VKD3D_ASSERT(node->type == HLSL_IR_SWITCH); + return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); + } + + static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) + { +- assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); ++ VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); + return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); + } + +@@ -1249,6 +1323,13 @@ void hlsl_block_cleanup(struct hlsl_block *block); + bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); + + void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); ++void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); ++ ++bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, ++ const struct vkd3d_shader_location *loc); ++struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, ++ struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, ++ unsigned int lhs_index, unsigned int arg_index); + + void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, +@@ -1259,7 +1340,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d + bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); + + void hlsl_cleanup_deref(struct hlsl_deref *deref); ++ + void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); ++bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src); + + void hlsl_cleanup_ir_switch_cases(struct list *cases); + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); +@@ -1270,6 +1353,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr); + void hlsl_free_instr(struct hlsl_ir_node *node); + void hlsl_free_instr_list(struct list *list); + void hlsl_free_state_block(struct hlsl_state_block *state_block); ++void hlsl_free_state_block_entry(struct hlsl_state_block_entry *state_block_entry); + void hlsl_free_type(struct hlsl_type *type); + void hlsl_free_var(struct hlsl_ir_var *decl); + +@@ -1342,7 +1426,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); + struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, + struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, +- struct hlsl_block *block, const struct vkd3d_shader_location *loc); ++ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, + const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, +@@ -1353,6 +1437,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned + struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, + struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, ++ const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, + struct hlsl_type *type, const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, +@@ -1361,6 +1447,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ + unsigned int sample_count); + struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, + struct hlsl_type *format, bool rasteriser_ordered); ++struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); + struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, + const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, +@@ -1432,10 +1519,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, + + D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); + D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); +-bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); +-bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); +-int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); ++bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, ++ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); ++bool hlsl_sm1_usage_from_semantic(const char *semantic_name, ++ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); ++ ++void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); ++int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, ++ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, ++ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + + bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, + const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index a5923d8bf8e..b4db142f6c2 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -29,6 +29,8 @@ + + static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); + ++static void apply_escape_sequences(char *str); ++ + #define YY_USER_ACTION update_location(yyget_extra(yyscanner), yyget_lloc(yyscanner)); + + %} +@@ -49,11 +51,11 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); + RESERVED1 auto|catch|char|class|const_cast|delete|dynamic_cast|enum + RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public + RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try +-RESERVED4 typename|union|unsigned|using|virtual ++RESERVED4 typename|union|using|virtual + + WS [ \t] + NEWLINE (\n)|(\r\n) +-STRING \"[^\"]*\" ++STRING \"([^\"\\]|\\.)*\" + IDENTIFIER [A-Za-z_][A-Za-z0-9_]* + + ANY (.) +@@ -164,6 +166,7 @@ textureCUBE {return KW_TEXTURECUBE; } + TextureCubeArray {return KW_TEXTURECUBEARRAY; } + true {return KW_TRUE; } + typedef {return KW_TYPEDEF; } ++unsigned {return KW_UNSIGNED; } + uniform {return KW_UNIFORM; } + vector {return KW_VECTOR; } + VertexShader {return KW_VERTEXSHADER; } +@@ -197,7 +200,9 @@ while {return KW_WHILE; } + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + + yylval->name = hlsl_strdup(ctx, yytext); +- if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) ++ if (hlsl_version_ge(ctx, 5, 1) && !strcmp(yytext, "ConstantBuffer")) ++ return KW_CONSTANTBUFFER; ++ else if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) + return VAR_IDENTIFIER; + else if (hlsl_get_type(ctx->cur_scope, yytext, true, true)) + return TYPE_IDENTIFIER; +@@ -205,6 +210,16 @@ while {return KW_WHILE; } + return NEW_IDENTIFIER; + } + ++{STRING} { ++ struct hlsl_ctx *ctx = yyget_extra(yyscanner); ++ char *string = hlsl_strdup(ctx, yytext + 1); ++ ++ string[strlen(string) - 1] = 0; ++ apply_escape_sequences(string); ++ yylval->name = string; ++ return STRING; ++ } ++ + [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[h|H|f|F]? { + yylval->floatval = atof(yytext); + return C_FLOAT; +@@ -289,6 +304,7 @@ while {return KW_WHILE; } + + BEGIN(pp_ignore); + string[strlen(string) - 1] = 0; ++ apply_escape_sequences(string); + yylval->name = string; + return STRING; + } +@@ -338,3 +354,115 @@ int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hls + yylex_destroy(ctx->scanner); + return ret; + } ++ ++static void apply_escape_sequences(char *str) ++{ ++ unsigned int i = 0, k = 0, r; ++ ++ while (str[i]) ++ { ++ unsigned char v = 0; ++ ++ if (str[i] != '\\') ++ { ++ str[k++] = str[i]; ++ ++i; ++ continue; ++ } ++ ++ ++i; ++ VKD3D_ASSERT(str[i]); ++ ++ if ('0' <= str[i] && str[i] <= '7') ++ { ++ /* octal, up to 3 digits. */ ++ for (r = 0; r < 3; ++r) ++ { ++ char c = str[i]; ++ ++ if ('0' <= c && c <= '7') ++ { ++ v = v << 3; ++ v += c - '0'; ++ ++i; ++ } ++ else ++ break; ++ } ++ str[k++] = v; ++ continue; ++ } ++ ++ if (str[i] == 'x') ++ { ++ bool number = false; ++ ++ /* hexadecimal */ ++ ++i; ++ while (1) ++ { ++ char c = str[i]; ++ ++ if ('0' <= c && c <= '9') ++ { ++ v = v << 4; ++ v += c - '0'; ++ number = true; ++ ++i; ++ } ++ else if ('a' <= c && c <= 'f') ++ { ++ v = v << 4; ++ v += c - 'a' + 10; ++ number = true; ++ ++i; ++ } ++ else if ('A' <= c && c <= 'F') ++ { ++ v = v << 4; ++ v += c - 'A' + 10; ++ number = true; ++ ++i; ++ } ++ else ++ break; ++ } ++ if (number) ++ str[k++] = v; ++ else ++ str[k++] = 'x'; ++ continue; ++ } ++ ++ switch (str[i]) ++ { ++ case 'a': ++ str[k++] = '\a'; ++ break; ++ case 'b': ++ str[k++] = '\b'; ++ break; ++ case 'f': ++ str[k++] = '\f'; ++ break; ++ case 'n': ++ str[k++] = '\n'; ++ break; ++ case 'r': ++ str[k++] = '\r'; ++ break; ++ case 't': ++ str[k++] = '\t'; ++ break; ++ case 'v': ++ str[k++] = '\v'; ++ break; ++ ++ default: ++ str[k++] = str[i]; ++ break; ++ } ++ ++i; ++ } ++ str[k++] = '\0'; ++} +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 9c1bdef926d..30bd53d0c49 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -34,6 +34,14 @@ struct parse_fields + size_t count, capacity; + }; + ++struct parse_initializer ++{ ++ struct hlsl_ir_node **args; ++ unsigned int args_count; ++ struct hlsl_block *instrs; ++ bool braces; ++}; ++ + struct parse_parameter + { + struct hlsl_type *type; +@@ -41,6 +49,7 @@ struct parse_parameter + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; + uint32_t modifiers; ++ struct parse_initializer initializer; + }; + + struct parse_colon_attribute +@@ -49,14 +58,6 @@ struct parse_colon_attribute + struct hlsl_reg_reservation reg_reservation; + }; + +-struct parse_initializer +-{ +- struct hlsl_ir_node **args; +- unsigned int args_count; +- struct hlsl_block *instrs; +- bool braces; +-}; +- + struct parse_array_sizes + { + uint32_t *sizes; /* innermost first */ +@@ -73,6 +74,7 @@ struct parse_variable_def + struct hlsl_semantic semantic; + struct hlsl_reg_reservation reg_reservation; + struct parse_initializer initializer; ++ struct hlsl_scope *annotations; + + struct hlsl_type *basic_type; + uint32_t modifiers; +@@ -342,11 +344,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl + broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1; + matrix_cast = !broadcast && dst_comp_count != src_comp_count + && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; +- assert(src_comp_count >= dst_comp_count || broadcast); ++ VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); + if (matrix_cast) + { +- assert(dst_type->dimx <= src_type->dimx); +- assert(dst_type->dimy <= src_type->dimy); ++ VKD3D_ASSERT(dst_type->dimx <= src_type->dimx); ++ VKD3D_ASSERT(dst_type->dimy <= src_type->dimy); + } + + if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) +@@ -573,12 +575,96 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); + } + ++static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, ++ struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) ++{ ++ union hlsl_constant_value_component ret = {0}; ++ struct hlsl_ir_constant *constant; ++ struct hlsl_ir_node *node; ++ struct hlsl_block expr; ++ struct hlsl_src src; ++ ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ switch (node->type) ++ { ++ case HLSL_IR_CONSTANT: ++ case HLSL_IR_EXPR: ++ case HLSL_IR_STRING_CONSTANT: ++ case HLSL_IR_SWIZZLE: ++ case HLSL_IR_LOAD: ++ case HLSL_IR_INDEX: ++ continue; ++ case HLSL_IR_STORE: ++ if (hlsl_ir_store(node)->lhs.var->is_synthetic) ++ break; ++ /* fall-through */ ++ case HLSL_IR_CALL: ++ case HLSL_IR_IF: ++ case HLSL_IR_LOOP: ++ case HLSL_IR_JUMP: ++ case HLSL_IR_RESOURCE_LOAD: ++ case HLSL_IR_RESOURCE_STORE: ++ case HLSL_IR_SWITCH: ++ case HLSL_IR_STATEBLOCK_CONSTANT: ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Expected literal expression."); ++ break; ++ } ++ } ++ ++ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) ++ return ret; ++ hlsl_block_add_block(&expr, block); ++ ++ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) ++ { ++ hlsl_block_cleanup(&expr); ++ return ret; ++ } ++ ++ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ ++ hlsl_src_from_node(&src, node_from_block(&expr)); ++ hlsl_run_const_passes(ctx, &expr); ++ node = src.node; ++ hlsl_src_remove(&src); ++ ++ if (node->type == HLSL_IR_CONSTANT) ++ { ++ constant = hlsl_ir_constant(node); ++ ret = constant->value.u[0]; ++ } ++ else if (node->type == HLSL_IR_STRING_CONSTANT) ++ { ++ hlsl_fixme(ctx, &node->loc, "Evaluate string constants as static expressions."); ++ } ++ else ++ { ++ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Failed to evaluate constant expression."); ++ } ++ ++ hlsl_block_cleanup(&expr); ++ ++ return ret; ++} ++ ++static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ const struct vkd3d_shader_location *loc) ++{ ++ union hlsl_constant_value_component res; ++ ++ res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); ++ return res.u; ++} ++ + static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, + struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) + { ++ enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; ++ unsigned int i, unroll_limit = 0; + struct hlsl_ir_node *loop; +- unsigned int i; + + if (attribute_list_has_duplicates(attributes)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); +@@ -591,18 +677,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct hlsl_attribute *attr = attributes->attrs[i]; + if (!strcmp(attr->name, "unroll")) + { +- if (attr->args_count) ++ if (attr->args_count > 1) + { +- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); ++ hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, ++ "Ignoring 'unroll' attribute with more than 1 argument."); ++ continue; + } +- else ++ ++ if (attr->args_count == 1) + { +- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); ++ struct hlsl_block expr; ++ hlsl_block_init(&expr); ++ if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) ++ return NULL; ++ ++ unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); ++ hlsl_block_cleanup(&expr); + } ++ ++ unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; + } + else if (!strcmp(attr->name, "loop")) + { +- /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ ++ unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + } + else if (!strcmp(attr->name, "fastopt") + || !strcmp(attr->name, "allow_uav_condition")) +@@ -631,7 +728,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + else + list_move_head(&body->instrs, &cond->instrs); + +- if (!(loop = hlsl_new_loop(ctx, body, loc))) ++ if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) + goto oom; + hlsl_block_add_instr(init, loop); + +@@ -663,7 +760,7 @@ static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) + { + unsigned int i = 0; + +- assert(attr_list); ++ VKD3D_ASSERT(attr_list); + for (i = 0; i < attr_list->count; ++i) + hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); + vkd3d_free(attr_list->attrs); +@@ -823,7 +920,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, st + { + struct hlsl_ir_node *index, *c; + +- assert(idx < record->data_type->e.record.field_count); ++ VKD3D_ASSERT(idx < record->data_type->e.record.field_count); + + if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) + return false; +@@ -953,7 +1050,7 @@ static void free_parse_variable_def(struct parse_variable_def *v) + vkd3d_free(v->arrays.sizes); + vkd3d_free(v->name); + hlsl_cleanup_semantic(&v->semantic); +- assert(!v->state_blocks); ++ VKD3D_ASSERT(!v->state_blocks); + vkd3d_free(v); + } + +@@ -964,7 +1061,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + size_t i = 0; + + if (type->class == HLSL_CLASS_MATRIX) +- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + memset(fields, 0, sizeof(*fields)); + fields->count = list_count(defs); +@@ -1013,6 +1110,10 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, + field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); + } + } ++ ++ if (hlsl_version_ge(ctx, 5, 1) && field->type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(field->type)) ++ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); ++ + vkd3d_free(v->arrays.sizes); + field->loc = v->loc; + field->name = v->name; +@@ -1094,13 +1195,16 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, + return true; + } + ++static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, ++ struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src); ++ + static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, + struct parse_parameter *param, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_var *var; + + if (param->type->class == HLSL_CLASS_MATRIX) +- assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ VKD3D_ASSERT(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, +@@ -1110,11 +1214,52 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is not allowed on function parameters."); + ++ if (parameters->count && parameters->vars[parameters->count - 1]->default_values ++ && !param->initializer.args_count) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, ++ "Missing default value for parameter '%s'.", param->name); ++ ++ if (param->initializer.args_count && (param->modifiers & HLSL_STORAGE_OUT)) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, ++ "Output parameter '%s' has a default value.", param->name); ++ + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, + ¶m->reg_reservation))) + return false; + var->is_param = 1; + ++ if (param->initializer.args_count) ++ { ++ unsigned int component_count = hlsl_type_component_count(param->type); ++ unsigned int store_index = 0; ++ unsigned int size, i; ++ ++ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) ++ return false; ++ ++ if (!param->initializer.braces) ++ { ++ if (!(add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc))) ++ return false; ++ ++ param->initializer.args[0] = node_from_block(param->initializer.instrs); ++ } ++ ++ size = initializer_size(¶m->initializer); ++ if (component_count != size) ++ { ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected %u components in initializer, but got %u.", component_count, size); ++ } ++ ++ for (i = 0; i < param->initializer.args_count; ++i) ++ { ++ initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]); ++ } ++ ++ free_parse_initializer(¶m->initializer); ++ } ++ + if (!hlsl_add_var(ctx, var, false)) + { + hlsl_free_var(var); +@@ -1210,12 +1355,42 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl + return true; + } + +-static bool parse_reservation_index(const char *string, char *type, uint32_t *index) ++static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, ++ struct hlsl_reg_reservation *reservation) + { +- if (!sscanf(string + 1, "%u", index)) +- return false; ++ char *endptr; ++ ++ reservation->reg_type = ascii_tolower(string[0]); ++ ++ /* Prior to SM5.1, fxc simply ignored bracket offsets for 'b' types. */ ++ if (reservation->reg_type == 'b' && hlsl_version_lt(ctx, 5, 1)) ++ { ++ bracket_offset = 0; ++ } ++ ++ if (string[1] == '\0') ++ { ++ reservation->reg_index = bracket_offset; ++ return true; ++ } ++ ++ reservation->reg_index = strtoul(string + 1, &endptr, 10) + bracket_offset; ++ ++ if (*endptr) ++ { ++ /* fxc for SM >= 4 treats all parse failures for 'b' types as successes, ++ * setting index to -1. It will later fail while validating slot limits. */ ++ if (reservation->reg_type == 'b' && hlsl_version_ge(ctx, 4, 0)) ++ { ++ reservation->reg_index = -1; ++ return true; ++ } ++ ++ /* All other types tolerate leftover characters. */ ++ if (endptr == string + 1) ++ return false; ++ } + +- *type = ascii_tolower(string[0]); + return true; + } + +@@ -1286,72 +1461,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * + return block; + } + +-static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, +- const struct vkd3d_shader_location *loc) +-{ +- struct hlsl_ir_constant *constant; +- struct hlsl_ir_node *node; +- struct hlsl_block expr; +- unsigned int ret = 0; +- struct hlsl_src src; +- +- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) +- { +- switch (node->type) +- { +- case HLSL_IR_CONSTANT: +- case HLSL_IR_EXPR: +- case HLSL_IR_SWIZZLE: +- case HLSL_IR_LOAD: +- case HLSL_IR_INDEX: +- continue; +- case HLSL_IR_CALL: +- case HLSL_IR_IF: +- case HLSL_IR_LOOP: +- case HLSL_IR_JUMP: +- case HLSL_IR_RESOURCE_LOAD: +- case HLSL_IR_RESOURCE_STORE: +- case HLSL_IR_STORE: +- case HLSL_IR_SWITCH: +- case HLSL_IR_STATEBLOCK_CONSTANT: +- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Expected literal expression."); +- } +- } +- +- if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) +- return 0; +- hlsl_block_add_block(&expr, block); +- +- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), +- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) +- { +- hlsl_block_cleanup(&expr); +- return 0; +- } +- +- /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ +- hlsl_src_from_node(&src, node_from_block(&expr)); +- hlsl_run_const_passes(ctx, &expr); +- node = src.node; +- hlsl_src_remove(&src); +- +- if (node->type == HLSL_IR_CONSTANT) +- { +- constant = hlsl_ir_constant(node); +- ret = constant->value.u[0].u; +- } +- else +- { +- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, +- "Failed to evaluate constant expression."); +- } +- +- hlsl_block_cleanup(&expr); +- +- return ret; +-} +- + static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) + { + /* Scalar vars can be converted to pretty much everything */ +@@ -1862,12 +1971,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned + return true; + } + ++static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) ++{ ++ /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. ++ * components are indexed by their sources. i.e. the first component comes from the first ++ * component of the rhs. */ ++ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; ++ ++ /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ ++ for (i = 0; i < 4; ++i) ++ { ++ if (*writemask & (1 << i)) ++ { ++ unsigned int s = (*swizzle >> (i * 8)) & 0xff; ++ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; ++ unsigned int idx = x + y * 4; ++ new_swizzle |= s << (bit++ * 8); ++ if (new_writemask & (1 << idx)) ++ return false; ++ new_writemask |= 1 << idx; ++ } ++ } ++ width = bit; ++ ++ /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the ++ * incoming vector. */ ++ bit = 0; ++ for (i = 0; i < 16; ++i) ++ { ++ for (j = 0; j < width; ++j) ++ { ++ unsigned int s = (new_swizzle >> (j * 8)) & 0xff; ++ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; ++ unsigned int idx = x + y * 4; ++ if (idx == i) ++ inverted |= j << (bit++ * 2); ++ } ++ } ++ ++ *swizzle = inverted; ++ *writemask = new_writemask; ++ *ret_width = width; ++ return true; ++} ++ + static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, + enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) + { + struct hlsl_type *lhs_type = lhs->data_type; + struct hlsl_ir_node *copy; +- unsigned int writemask = 0; ++ unsigned int writemask = 0, width = 0; ++ bool matrix_writemask = false; + + if (assign_op == ASSIGN_OP_SUB) + { +@@ -1879,13 +2033,16 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + { + enum hlsl_ir_expr_op op = op_from_assignment(assign_op); + +- assert(op); ++ VKD3D_ASSERT(op); + if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) + return NULL; + } + + if (hlsl_is_numeric_type(lhs_type)) ++ { + writemask = (1 << lhs_type->dimx) - 1; ++ width = lhs_type->dimx; ++ } + + if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) + return NULL; +@@ -1902,12 +2059,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); + struct hlsl_ir_node *new_swizzle; + uint32_t s = swizzle->swizzle; +- unsigned int width; + +- if (lhs->data_type->class == HLSL_CLASS_MATRIX) +- hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); ++ VKD3D_ASSERT(!matrix_writemask); + +- if (!invert_swizzle(&s, &writemask, &width)) ++ if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) ++ { ++ if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) ++ { ++ hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); ++ return NULL; ++ } ++ if (!invert_swizzle_matrix(&s, &writemask, &width)) ++ { ++ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); ++ return NULL; ++ } ++ matrix_writemask = true; ++ } ++ else if (!invert_swizzle(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); + return NULL; +@@ -1947,7 +2116,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + return NULL; + + resource_type = hlsl_deref_get_type(ctx, &resource_deref); +- assert(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); ++ VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); + + if (resource_type->class != HLSL_CLASS_UAV) + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +@@ -1955,13 +2124,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + + dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); + +- if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) ++ if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, + "Resource store expressions must write to all components."); + +- assert(coords->data_type->class == HLSL_CLASS_VECTOR); +- assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); +- assert(coords->data_type->dimx == dim_count); ++ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(coords->data_type->dimx == dim_count); + + if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) + { +@@ -1971,12 +2140,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + hlsl_block_add_instr(block, store); + hlsl_cleanup_deref(&resource_deref); + } ++ else if (matrix_writemask) ++ { ++ struct hlsl_deref deref; ++ unsigned int i, j, k = 0; ++ ++ hlsl_init_deref_from_index_chain(ctx, &deref, lhs); ++ ++ for (i = 0; i < lhs->data_type->dimy; ++i) ++ { ++ for (j = 0; j < lhs->data_type->dimx; ++j) ++ { ++ struct hlsl_ir_node *load; ++ struct hlsl_block store_block; ++ const unsigned int idx = i * 4 + j; ++ const unsigned int component = i * lhs->data_type->dimx + j; ++ ++ if (!(writemask & (1 << idx))) ++ continue; ++ ++ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) ++ { ++ hlsl_cleanup_deref(&deref); ++ return NULL; ++ } ++ ++ if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) ++ { ++ hlsl_cleanup_deref(&deref); ++ return NULL; ++ } ++ hlsl_block_add_block(block, &store_block); ++ } ++ } ++ ++ hlsl_cleanup_deref(&deref); ++ } + else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) + { + struct hlsl_ir_index *row = hlsl_ir_index(lhs); + struct hlsl_ir_node *mat = row->val.node; + unsigned int i, k = 0; + ++ VKD3D_ASSERT(!matrix_writemask); ++ + for (i = 0; i < mat->data_type->dimx; ++i) + { + struct hlsl_ir_node *cell, *load, *store, *c; +@@ -2067,6 +2274,55 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d + return true; + } + ++/* For some reason, for matrices, values from default value initializers end up in different ++ * components than from regular initializers. Default value initializers fill the matrix in ++ * vertical reading order (left-to-right top-to-bottom) instead of regular reading order ++ * (top-to-bottom left-to-right), so they have to be adjusted. ++ * An exception is that the order of matrix initializers for function parameters are row-major ++ * (top-to-bottom left-to-right). */ ++static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, ++ struct hlsl_type *type, unsigned int index) ++{ ++ unsigned int element_comp_count, element, x, y, i; ++ unsigned int base = 0; ++ ++ if (ctx->profile->major_version < 4) ++ return index; ++ ++ if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) ++ return index; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_MATRIX: ++ x = index / type->dimy; ++ y = index % type->dimy; ++ return y * type->dimx + x; ++ ++ case HLSL_CLASS_ARRAY: ++ element_comp_count = hlsl_type_component_count(type->e.array.type); ++ element = index / element_comp_count; ++ base = element * element_comp_count; ++ return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); ++ ++ case HLSL_CLASS_STRUCT: ++ for (i = 0; i < type->e.record.field_count; ++i) ++ { ++ struct hlsl_type *field_type = type->e.record.fields[i].type; ++ ++ element_comp_count = hlsl_type_component_count(field_type); ++ if (index - base < element_comp_count) ++ return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); ++ base += element_comp_count; ++ } ++ break; ++ ++ default: ++ return index; ++ } ++ vkd3d_unreachable(); ++} ++ + static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) + { +@@ -2087,12 +2343,33 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i + + dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); + +- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) +- return; ++ if (dst->default_values) ++ { ++ struct hlsl_default_value default_value = {0}; ++ unsigned int dst_index; + +- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) +- return; +- hlsl_block_add_block(instrs, &block); ++ if (!hlsl_clone_block(ctx, &block, instrs)) ++ return; ++ default_value.value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); ++ ++ if (dst->is_param) ++ dst_index = *store_index; ++ else ++ dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); ++ ++ dst->default_values[dst_index] = default_value; ++ ++ hlsl_block_cleanup(&block); ++ } ++ else ++ { ++ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) ++ return; ++ ++ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) ++ return; ++ hlsl_block_add_block(instrs, &block); ++ } + + ++*store_index; + } +@@ -2171,16 +2448,17 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + struct hlsl_semantic new_semantic; + uint32_t modifiers = v->modifiers; + bool unbounded_res_array = false; ++ bool constant_buffer = false; + struct hlsl_ir_var *var; + struct hlsl_type *type; + bool local = true; + char *var_name; + unsigned int i; + +- assert(basic_type); ++ VKD3D_ASSERT(basic_type); + + if (basic_type->class == HLSL_CLASS_MATRIX) +- assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ VKD3D_ASSERT(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + + type = basic_type; + +@@ -2190,6 +2468,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); + } + ++ if (type->class == HLSL_CLASS_CONSTANT_BUFFER) ++ { ++ type = type->e.resource.format; ++ constant_buffer = true; ++ } ++ + if (unbounded_res_array) + { + if (v->arrays.count == 1) +@@ -2246,17 +2530,22 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + } + } + ++ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) ++ { ++ /* SM 5.1/6.x descriptor arrays act differently from previous versions. ++ * Not only are they treated as a single object in reflection, but they ++ * act as a single component for the purposes of assignment and ++ * initialization. */ ++ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); ++ } ++ + if (!(var_name = vkd3d_strdup(v->name))) + return; + +- new_semantic = v->semantic; +- if (v->semantic.name) ++ if (!hlsl_clone_semantic(ctx, &new_semantic, &v->semantic)) + { +- if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) +- { +- vkd3d_free(var_name); +- return; +- } ++ vkd3d_free(var_name); ++ return; + } + + if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) +@@ -2266,7 +2555,18 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + return; + } + +- var->buffer = ctx->cur_buffer; ++ var->annotations = v->annotations; ++ ++ if (constant_buffer && ctx->cur_scope == ctx->globals) ++ { ++ if (!(var_name = vkd3d_strdup(v->name))) ++ return; ++ var->buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, var_name, modifiers, &v->reg_reservation, NULL, &v->loc); ++ } ++ else ++ { ++ var->buffer = ctx->cur_buffer; ++ } + + if (var->buffer == ctx->globals_buffer) + { +@@ -2289,8 +2589,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + +- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) ++ && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) ++ { + check_invalid_object_fields(ctx, var); ++ } + + if ((func = hlsl_get_first_func_decl(ctx, var->name))) + { +@@ -2323,6 +2626,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, + "Const variable \"%s\" is missing an initializer.", var->name); + } ++ ++ if (var->annotations) ++ { ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Annotations are only allowed for objects in the global scope."); ++ } + } + + if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) +@@ -2348,6 +2657,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + { + struct parse_variable_def *v, *v_next; + struct hlsl_block *initializers; ++ unsigned int component_count; + struct hlsl_ir_var *var; + struct hlsl_type *type; + +@@ -2371,6 +2681,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + } + + type = var->data_type; ++ component_count = hlsl_type_component_count(type); + + var->state_blocks = v->state_blocks; + var->state_block_count = v->state_block_count; +@@ -2379,51 +2690,78 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var + v->state_block_capacity = 0; + v->state_blocks = NULL; + +- if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) ++ if (var->state_blocks && component_count != var->state_block_count) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u state blocks, but got %u.", +- hlsl_type_component_count(type), var->state_block_count); ++ "Expected %u state blocks, but got %u.", component_count, var->state_block_count); + free_parse_variable_def(v); + continue; + } + + if (v->initializer.args_count) + { +- if (v->initializer.braces) ++ unsigned int store_index = 0; ++ bool is_default_values_initializer; ++ unsigned int size, k; ++ ++ is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) ++ || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) ++ || ctx->cur_scope->annotations; ++ ++ if (is_default_values_initializer) + { +- unsigned int size = initializer_size(&v->initializer); +- unsigned int store_index = 0; +- unsigned int k; ++ /* Default values might have been allocated already for another variable of the same name, ++ in the same scope. */ ++ if (var->default_values) ++ { ++ free_parse_variable_def(v); ++ continue; ++ } + +- if (hlsl_type_component_count(type) != size) ++ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) + { +- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, +- "Expected %u components in initializer, but got %u.", +- hlsl_type_component_count(type), size); + free_parse_variable_def(v); + continue; + } ++ } + +- for (k = 0; k < v->initializer.args_count; ++k) ++ if (!v->initializer.braces) ++ { ++ if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) + { +- initialize_var_components(ctx, v->initializer.instrs, var, +- &store_index, v->initializer.args[k]); ++ free_parse_variable_def(v); ++ continue; + } ++ ++ v->initializer.args[0] = node_from_block(v->initializer.instrs); + } +- else ++ ++ size = initializer_size(&v->initializer); ++ if (component_count != size) + { +- struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); ++ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, ++ "Expected %u components in initializer, but got %u.", component_count, size); ++ free_parse_variable_def(v); ++ continue; ++ } + +- assert(v->initializer.args_count == 1); +- hlsl_block_add_instr(v->initializer.instrs, &load->node); +- add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); ++ for (k = 0; k < v->initializer.args_count; ++k) ++ { ++ initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); + } + +- if (var->storage_modifiers & HLSL_STORAGE_STATIC) ++ if (is_default_values_initializer) ++ { ++ hlsl_dump_var_default_values(var); ++ } ++ else if (var->storage_modifiers & HLSL_STORAGE_STATIC) ++ { + hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); ++ } + else ++ { + hlsl_block_add_block(initializers, v->initializer.instrs); ++ } + } + else if (var->storage_modifiers & HLSL_STORAGE_STATIC) + { +@@ -2469,14 +2807,18 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, + { + unsigned int i; + +- if (decl->parameters.count != args->args_count) ++ if (decl->parameters.count < args->args_count) + return false; + +- for (i = 0; i < decl->parameters.count; ++i) ++ for (i = 0; i < args->args_count; ++i) + { + if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type)) + return false; + } ++ ++ if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values) ++ return false; ++ + return true; + } + +@@ -2519,11 +2861,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu + const struct parse_initializer *args, const struct vkd3d_shader_location *loc) + { + struct hlsl_ir_node *call; +- unsigned int i; ++ unsigned int i, j; + +- assert(args->args_count == func->parameters.count); ++ VKD3D_ASSERT(args->args_count <= func->parameters.count); + +- for (i = 0; i < func->parameters.count; ++i) ++ for (i = 0; i < args->args_count; ++i) + { + struct hlsl_ir_var *param = func->parameters.vars[i]; + struct hlsl_ir_node *arg = args->args[i]; +@@ -2548,11 +2890,40 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu + } + } + ++ /* Add default values for the remaining parameters. */ ++ for (i = args->args_count; i < func->parameters.count; ++i) ++ { ++ struct hlsl_ir_var *param = func->parameters.vars[i]; ++ unsigned int comp_count = hlsl_type_component_count(param->data_type); ++ struct hlsl_deref param_deref; ++ ++ VKD3D_ASSERT(param->default_values); ++ ++ hlsl_init_simple_deref_from_var(¶m_deref, param); ++ ++ for (j = 0; j < comp_count; ++j) ++ { ++ struct hlsl_type *type = hlsl_type_get_component_type(ctx, param->data_type, j); ++ struct hlsl_constant_value value; ++ struct hlsl_ir_node *comp; ++ struct hlsl_block store_block; ++ ++ value.u[0] = param->default_values[j].value; ++ if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, comp); ++ ++ if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) ++ return false; ++ hlsl_block_add_block(args->instrs, &store_block); ++ } ++ } ++ + if (!(call = hlsl_new_call(ctx, func, loc))) + return false; + hlsl_block_add_instr(args->instrs, call); + +- for (i = 0; i < func->parameters.count; ++i) ++ for (i = 0; i < args->args_count; ++i) + { + struct hlsl_ir_var *param = func->parameters.vars[i]; + struct hlsl_ir_node *arg = args->args[i]; +@@ -2699,6 +3070,19 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, + return convert_args(ctx, params, type, loc); + } + ++static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_type *type; ++ ++ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) ++ return false; ++ ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); ++ ++ return convert_args(ctx, params, type, loc); ++} ++ + static bool intrinsic_abs(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -2923,11 +3307,34 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Wrong argument type of asfloat(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", ++ "Wrong argument type of asfloat(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", ++ string->buffer); ++ hlsl_release_string_buffer(ctx, string); ++ } ++ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); ++ ++ operands[0] = params->args[0]; ++ return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); ++} ++ ++static bool intrinsic_asint(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_type *data_type; ++ ++ data_type = params->args[0]->data_type; ++ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) ++ { ++ struct vkd3d_string_buffer *string; ++ ++ if ((string = hlsl_type_to_string(ctx, data_type))) ++ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Wrong argument type of asint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } +- data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); ++ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_INT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); +@@ -3353,6 +3760,49 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); + } + ++static bool intrinsic_faceforward(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_function_decl *func; ++ struct hlsl_type *type; ++ char *body; ++ ++ static const char template[] = ++ "%s faceforward(%s n, %s i, %s ng)\n" ++ "{\n" ++ " return dot(i, ng) < 0 ? n : -n;\n" ++ "}\n"; ++ ++ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) ++ return false; ++ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, ++ type->name, type->name, type->name, type->name))) ++ return false; ++ func = hlsl_compile_internal_function(ctx, "faceforward", body); ++ vkd3d_free(body); ++ if (!func) ++ return false; ++ ++ return add_user_call(ctx, func, params, loc); ++} ++ ++static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_type *type; ++ ++ if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) ++ return false; ++ ++ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT); ++ ++ operands[0] = params->args[0]; ++ return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); ++} ++ + static bool intrinsic_floor(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3646,12 +4096,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, + } + else if (vect_count == 1) + { +- assert(matrix_type->dimx == 1 || matrix_type->dimy == 1); ++ VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1); + ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy); + } + else + { +- assert(matrix_type->dimx == 1 && matrix_type->dimy == 1); ++ VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1); + ret_type = hlsl_get_scalar_type(ctx, base); + } + +@@ -3764,6 +4214,17 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, + return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, rad, loc); + } + ++static bool intrinsic_rcp(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg; ++ ++ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) ++ return false; ++ ++ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RCP, arg, loc); ++} ++ + static bool intrinsic_reflect(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -3814,7 +4275,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, + return false; + } + +- assert(params->args_count == 3); ++ VKD3D_ASSERT(params->args_count == 3); + mut_params = *params; + mut_params.args_count = 2; + if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) +@@ -4032,6 +4493,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, + static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, + const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) + { ++ unsigned int sampler_dim = hlsl_sampler_dim_count(dim); + struct hlsl_resource_load_params load_params = { 0 }; + const struct hlsl_type *sampler_type; + struct hlsl_ir_node *coords, *sample; +@@ -4043,11 +4505,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + return false; + } + +- if (params->args_count == 4) +- { +- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); +- } +- + sampler_type = params->args[0]->data_type; + if (sampler_type->class != HLSL_CLASS_SAMPLER + || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) +@@ -4061,18 +4518,22 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + hlsl_release_string_buffer(ctx, string); + } + +- if (!strcmp(name, "tex2Dlod")) ++ if (!strcmp(name, "tex2Dbias") ++ || !strcmp(name, "tex2Dlod")) + { + struct hlsl_ir_node *lod, *c; + +- load_params.type = HLSL_RESOURCE_SAMPLE_LOD; ++ if (!strcmp(name, "tex2Dlod")) ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; ++ else ++ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; + +- if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) ++ if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) + return false; + hlsl_block_add_instr(params->instrs, c); + +- if (!(coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, +- hlsl_sampler_dim_count(dim)), loc))) ++ if (!(coords = add_implicit_conversion(ctx, params->instrs, c, ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } +@@ -4099,14 +4560,13 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + + if (hlsl_version_ge(ctx, 4, 0)) + { +- unsigned int count = hlsl_sampler_dim_count(dim); + struct hlsl_ir_node *divisor; + +- if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), count, coords, loc))) ++ if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) + return false; + hlsl_block_add_instr(params->instrs, divisor); + +- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), count, coords, loc))) ++ if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) + return false; + hlsl_block_add_instr(params->instrs, coords); + +@@ -4120,12 +4580,34 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * + load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; + } + } ++ else if (params->args_count == 4) /* Gradient sampling. */ ++ { ++ if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ { ++ return false; ++ } ++ ++ if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ { ++ return false; ++ } ++ ++ if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) ++ { ++ return false; ++ } ++ ++ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; ++ } + else + { + load_params.type = HLSL_RESOURCE_SAMPLE; + + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], +- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) ++ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } +@@ -4181,12 +4663,30 @@ static bool intrinsic_tex1D(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); + } + ++static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); ++} ++ + static bool intrinsic_tex2D(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { + return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); + } + ++static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); ++} ++ ++static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); ++} ++ + static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4205,6 +4705,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); + } + ++static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); ++} ++ + static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4217,6 +4723,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, + return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); + } + ++static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); ++} ++ + static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) + { +@@ -4336,6 +4848,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, + return true; + } + ++static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, ++ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *expr; ++ ++ if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, ++ operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) ++ return false; ++ hlsl_block_add_instr(params->instrs, expr); ++ ++ return true; ++} ++ + static const struct intrinsic_function + { + const char *name; +@@ -4348,12 +4874,14 @@ intrinsic_functions[] = + { + /* Note: these entries should be kept in alphabetical order. */ + {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, ++ {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, + {"abs", 1, true, intrinsic_abs}, + {"acos", 1, true, intrinsic_acos}, + {"all", 1, true, intrinsic_all}, + {"any", 1, true, intrinsic_any}, + {"asfloat", 1, true, intrinsic_asfloat}, + {"asin", 1, true, intrinsic_asin}, ++ {"asint", 1, true, intrinsic_asint}, + {"asuint", -1, true, intrinsic_asuint}, + {"atan", 1, true, intrinsic_atan}, + {"atan2", 2, true, intrinsic_atan2}, +@@ -4375,6 +4903,8 @@ intrinsic_functions[] = + {"dot", 2, true, intrinsic_dot}, + {"exp", 1, true, intrinsic_exp}, + {"exp2", 1, true, intrinsic_exp2}, ++ {"f16tof32", 1, true, intrinsic_f16tof32}, ++ {"faceforward", 3, true, intrinsic_faceforward}, + {"floor", 1, true, intrinsic_floor}, + {"fmod", 2, true, intrinsic_fmod}, + {"frac", 1, true, intrinsic_frac}, +@@ -4392,6 +4922,7 @@ intrinsic_functions[] = + {"normalize", 1, true, intrinsic_normalize}, + {"pow", 2, true, intrinsic_pow}, + {"radians", 1, true, intrinsic_radians}, ++ {"rcp", 1, true, intrinsic_rcp}, + {"reflect", 2, true, intrinsic_reflect}, + {"refract", 3, true, intrinsic_refract}, + {"round", 1, true, intrinsic_round}, +@@ -4406,12 +4937,17 @@ intrinsic_functions[] = + {"tan", 1, true, intrinsic_tan}, + {"tanh", 1, true, intrinsic_tanh}, + {"tex1D", -1, false, intrinsic_tex1D}, ++ {"tex1Dgrad", 4, false, intrinsic_tex1Dgrad}, + {"tex2D", -1, false, intrinsic_tex2D}, ++ {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, ++ {"tex2Dgrad", 4, false, intrinsic_tex2Dgrad}, + {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, + {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, + {"tex3D", -1, false, intrinsic_tex3D}, ++ {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, + {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, + {"texCUBE", -1, false, intrinsic_texCUBE}, ++ {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, + {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, + {"transpose", 1, true, intrinsic_transpose}, + {"trunc", 1, true, intrinsic_trunc}, +@@ -4599,7 +5135,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, + common_type = first->data_type; + } + +- assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + args[0] = cond; + args[1] = first; +@@ -5481,6 +6017,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_BREAK + %token KW_BUFFER + %token KW_CASE ++%token KW_CONSTANTBUFFER + %token KW_CBUFFER + %token KW_CENTROID + %token KW_COLUMN_MAJOR +@@ -5566,6 +6103,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_TEXTURECUBEARRAY + %token KW_TRUE + %token KW_TYPEDEF ++%token KW_UNSIGNED + %token KW_UNIFORM + %token KW_VECTOR + %token KW_VERTEXSHADER +@@ -5670,6 +6208,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + + %type if_body + ++%type array ++ + %type var_modifiers + + %type any_identifier +@@ -5678,6 +6218,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %type name_opt + + %type parameter ++%type parameter_decl + + %type param_list + %type parameters +@@ -5717,8 +6258,7 @@ hlsl_prog: + | hlsl_prog buffer_declaration buffer_body + | hlsl_prog declaration_statement + { +- if (!list_empty(&$2->instrs)) +- hlsl_fixme(ctx, &@2, "Uniform initializer."); ++ hlsl_block_add_block(&ctx->static_initializers, $2); + destroy_block($2); + } + | hlsl_prog preproc_directive +@@ -5742,19 +6282,31 @@ pass: + + annotations_list: + variables_def_typed ';' ++ { ++ struct hlsl_block *block; ++ ++ block = initialize_vars(ctx, $1); ++ destroy_block(block); ++ } + | annotations_list variables_def_typed ';' ++ { ++ struct hlsl_block *block; ++ ++ block = initialize_vars(ctx, $2); ++ destroy_block(block); ++ } + + annotations_opt: + %empty + { + $$ = NULL; + } +- | '<' scope_start '>' ++ | '<' annotations_scope_start '>' + { + hlsl_pop_scope(ctx); + $$ = NULL; + } +- | '<' scope_start annotations_list '>' ++ | '<' annotations_scope_start annotations_list '>' + { + struct hlsl_scope *scope = ctx->cur_scope; + +@@ -6083,7 +6635,7 @@ func_declaration: + + if (!$1.first) + { +- assert(decl->parameters.count == $1.parameters.count); ++ VKD3D_ASSERT(decl->parameters.count == $1.parameters.count); + + for (i = 0; i < $1.parameters.count; ++i) + { +@@ -6198,7 +6750,7 @@ func_prototype_no_attrs: + * brittle and ugly. + */ + +- assert($5.count == params->count); ++ VKD3D_ASSERT($5.count == params->count); + for (i = 0; i < params->count; ++i) + { + struct hlsl_ir_var *orig_param = params->vars[i]; +@@ -6282,6 +6834,13 @@ switch_scope_start: + ctx->cur_scope->_switch = true; + } + ++annotations_scope_start: ++ %empty ++ { ++ hlsl_push_scope(ctx); ++ ctx->cur_scope->annotations = true; ++ } ++ + var_identifier: + VAR_IDENTIFIER + | NEW_IDENTIFIER +@@ -6315,6 +6874,9 @@ semantic: + { + char *p; + ++ if (!($$.raw_name = hlsl_strdup(ctx, $2))) ++ YYABORT; ++ + for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) + ; + $$.name = $2; +@@ -6330,22 +6892,34 @@ register_reservation: + ':' KW_REGISTER '(' any_identifier ')' + { + memset(&$$, 0, sizeof($$)); +- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ if (!parse_reservation_index(ctx, $4, 0, &$$)) ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ ++ vkd3d_free($4); ++ } ++ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ')' ++ { ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) ++ { + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); ++ } + + vkd3d_free($4); ++ vkd3d_free($6); + } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' + { + memset(&$$, 0, sizeof($$)); +- if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ if (parse_reservation_index(ctx, $6, 0, &$$)) + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + } + else if (parse_reservation_space($6, &$$.reg_space)) + { +- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) ++ if (!parse_reservation_index(ctx, $4, 0, &$$)) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + } +@@ -6358,12 +6932,45 @@ register_reservation: + vkd3d_free($4); + vkd3d_free($6); + } ++ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ',' any_identifier ')' ++ { ++ memset(&$$, 0, sizeof($$)); ++ ++ if (!parse_reservation_space($9, &$$.reg_space)) ++ hlsl_error(ctx, &@9, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register space reservation '%s'.", $9); ++ ++ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) ++ { ++ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $4); ++ } ++ ++ vkd3d_free($4); ++ vkd3d_free($6); ++ vkd3d_free($9); ++ } ++ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ')' ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); ++ ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) ++ { ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $6); ++ } ++ ++ vkd3d_free($4); ++ vkd3d_free($6); ++ vkd3d_free($8); ++ } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + + memset(&$$, 0, sizeof($$)); +- if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) ++ if (!parse_reservation_index(ctx, $6, 0, &$$)) + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $6); + +@@ -6375,6 +6982,26 @@ register_reservation: + vkd3d_free($6); + vkd3d_free($8); + } ++ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ',' any_identifier ')' ++ { ++ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); ++ ++ memset(&$$, 0, sizeof($$)); ++ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) ++ { ++ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register reservation '%s'.", $6); ++ } ++ ++ if (!parse_reservation_space($11, &$$.reg_space)) ++ hlsl_error(ctx, &@11, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Invalid register space reservation '%s'.", $11); ++ ++ vkd3d_free($4); ++ vkd3d_free($6); ++ vkd3d_free($8); ++ vkd3d_free($11); ++ } + + packoffset_reservation: + ':' KW_PACKOFFSET '(' any_identifier ')' +@@ -6427,6 +7054,14 @@ param_list: + } + + parameter: ++ parameter_decl ++ | parameter_decl '=' complex_initializer ++ { ++ $$ = $1; ++ $$.initializer = $3; ++ } ++ ++parameter_decl: + var_modifiers type_no_void any_identifier arrays colon_attribute + { + uint32_t modifiers = $1; +@@ -6449,11 +7084,18 @@ parameter: + } + type = hlsl_new_array_type(ctx, type, $4.sizes[i]); + } ++ vkd3d_free($4.sizes); ++ + $$.type = type; + ++ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) ++ hlsl_fixme(ctx, &@2, "Shader model 5.1+ resource array."); ++ + $$.name = $3; + $$.semantic = $5.semantic; + $$.reg_reservation = $5.reg_reservation; ++ ++ memset(&$$.initializer, 0, sizeof($$.initializer)); + } + + texture_type: +@@ -6696,6 +7338,10 @@ type_no_void: + validate_uav_type(ctx, $1, $3, &@3); + $$ = hlsl_new_uav_type(ctx, $1, $3, true); + } ++ | KW_STRING ++ { ++ $$ = ctx->builtin_types.string; ++ } + | TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); +@@ -6713,6 +7359,26 @@ type_no_void: + } + vkd3d_free($1); + } ++ | KW_UNSIGNED TYPE_IDENTIFIER ++ { ++ struct hlsl_type *type = hlsl_get_type(ctx->cur_scope, $2, true, true); ++ ++ if (hlsl_is_numeric_type(type) && type->e.numeric.type == HLSL_TYPE_INT) ++ { ++ if (!(type = hlsl_type_clone(ctx, type, 0, 0))) ++ YYABORT; ++ vkd3d_free((void *)type->name); ++ type->name = NULL; ++ type->e.numeric.type = HLSL_TYPE_UINT; ++ } ++ else ++ { ++ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "The 'unsigned' keyword can't be used with type %s.", $2); ++ } ++ ++ $$ = type; ++ } + | KW_STRUCT TYPE_IDENTIFIER + { + $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); +@@ -6724,6 +7390,10 @@ type_no_void: + { + $$ = hlsl_get_type(ctx->cur_scope, "RenderTargetView", true, true); + } ++ | KW_DEPTHSTENCILSTATE ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilState", true, true); ++ } + | KW_DEPTHSTENCILVIEW + { + $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); +@@ -6736,6 +7406,33 @@ type_no_void: + { + $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); + } ++ | KW_COMPUTESHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "ComputeShader", true, true); ++ } ++ | KW_DOMAINSHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "DomainShader", true, true); ++ } ++ | KW_HULLSHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "HullShader", true, true); ++ } ++ | KW_GEOMETRYSHADER ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); ++ } ++ | KW_CONSTANTBUFFER '<' type '>' ++ { ++ if ($3->class != HLSL_CLASS_STRUCT) ++ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "ConstantBuffer<...> requires user-defined structure type."); ++ $$ = hlsl_new_cb_type(ctx, $3); ++ } ++ | KW_RASTERIZERSTATE ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); ++ } + + type: + type_no_void +@@ -6840,10 +7537,10 @@ variables_def_typed: + { + struct parse_variable_def *head_def; + +- assert(!list_empty($1)); ++ VKD3D_ASSERT(!list_empty($1)); + head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); + +- assert(head_def->basic_type); ++ VKD3D_ASSERT(head_def->basic_type); + $3->basic_type = head_def->basic_type; + $3->modifiers = head_def->modifiers; + $3->modifiers_loc = head_def->modifiers_loc; +@@ -6855,7 +7552,7 @@ variables_def_typed: + } + + variable_decl: +- any_identifier arrays colon_attribute ++ any_identifier arrays colon_attribute annotations_opt + { + $$ = hlsl_alloc(ctx, sizeof(*$$)); + $$->loc = @1; +@@ -6863,6 +7560,7 @@ variable_decl: + $$->arrays = $2; + $$->semantic = $3.semantic; + $$->reg_reservation = $3.reg_reservation; ++ $$->annotations = $4; + } + + state_block_start: +@@ -6932,6 +7630,34 @@ state_block: + hlsl_src_from_node(&entry->args[i], $5.args[i]); + vkd3d_free($5.args); + ++ $$ = $1; ++ state_block_add_entry($$, entry); ++ } ++ | state_block any_identifier '(' func_arguments ')' ';' ++ { ++ struct hlsl_state_block_entry *entry; ++ unsigned int i; ++ ++ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) ++ YYABORT; ++ ++ entry->is_function_call = true; ++ ++ entry->name = $2; ++ entry->lhs_has_index = false; ++ entry->lhs_index = 0; ++ ++ entry->instrs = $4.instrs; ++ ++ entry->args_count = $4.args_count; ++ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) ++ YYABORT; ++ for (i = 0; i < entry->args_count; ++i) ++ hlsl_src_from_node(&entry->args[i], $4.args[i]); ++ vkd3d_free($4.args); ++ ++ hlsl_validate_state_block_entry(ctx, entry, &@4); ++ + $$ = $1; + state_block_add_entry($$, entry); + } +@@ -7020,52 +7746,43 @@ variable_def_typed: + $$->modifiers_loc = @1; + } + +-arrays: +- %empty ++array: ++ '[' ']' + { +- $$.sizes = NULL; +- $$.count = 0; ++ $$ = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; + } +- | '[' expr ']' arrays ++ | '[' expr ']' + { +- uint32_t *new_array; +- unsigned int size; ++ $$ = evaluate_static_expression_as_uint(ctx, $2, &@2); + +- size = evaluate_static_expression_as_uint(ctx, $2, &@2); +- +- destroy_block($2); +- +- $$ = $4; +- +- if (!size) ++ if (!$$) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, + "Array size is not a positive integer constant."); +- vkd3d_free($$.sizes); + YYABORT; + } + +- if (size > 65536) ++ if ($$ > 65536) + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, +- "Array size %u is not between 1 and 65536.", size); +- vkd3d_free($$.sizes); ++ "Array size %u is not between 1 and 65536.", $$); + YYABORT; + } + +- if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) +- { +- vkd3d_free($$.sizes); +- YYABORT; +- } +- $$.sizes = new_array; +- $$.sizes[$$.count++] = size; ++ destroy_block($2); ++ } ++ ++arrays: ++ %empty ++ { ++ $$.sizes = NULL; ++ $$.count = 0; + } +- | '[' ']' arrays ++ | array arrays + { + uint32_t *new_array; + +- $$ = $3; ++ $$ = $2; + + if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) + { +@@ -7074,7 +7791,7 @@ arrays: + } + + $$.sizes = new_array; +- $$.sizes[$$.count++] = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; ++ $$.sizes[$$.count++] = $1; + } + + var_modifiers: +@@ -7156,6 +7873,8 @@ var_modifiers: + } + | var_identifier var_modifiers + { ++ $$ = $2; ++ + if (!strcmp($1, "precise")) + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); + else if (!strcmp($1, "single")) +@@ -7587,6 +8306,23 @@ primary_expr: + YYABORT; + } + } ++ | STRING ++ { ++ struct hlsl_ir_node *c; ++ ++ if (!(c = hlsl_new_string_constant(ctx, $1, &@1))) ++ { ++ vkd3d_free($1); ++ YYABORT; ++ } ++ vkd3d_free($1); ++ ++ if (!($$ = make_block(ctx, c))) ++ { ++ hlsl_free_instr(c); ++ YYABORT; ++ } ++ } + | VAR_IDENTIFIER + { + struct hlsl_ir_load *load; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index bdb72a1fab9..049461cdb7d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -20,6 +20,7 @@ + + #include "hlsl.h" + #include ++#include + + /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ + static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, +@@ -52,7 +53,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + + if (regset == HLSL_REGSET_NUMERIC) + { +- assert(size % 4 == 0); ++ VKD3D_ASSERT(size % 4 == 0); + size /= 4; + } + +@@ -75,7 +76,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str + + if (regset == HLSL_REGSET_NUMERIC) + { +- assert(*offset_component == 0); ++ VKD3D_ASSERT(*offset_component == 0); + *offset_component = field_offset % 4; + field_offset /= 4; + } +@@ -120,7 +121,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st + return NULL; + hlsl_block_add_instr(block, offset); + +- assert(deref->var); ++ VKD3D_ASSERT(deref->var); + type = deref->var->data_type; + + for (i = 0; i < deref->path_len; ++i) +@@ -153,8 +154,8 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der + struct hlsl_block block; + struct hlsl_type *type; + +- assert(deref->var); +- assert(!hlsl_deref_is_lowered(deref)); ++ VKD3D_ASSERT(deref->var); ++ VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); + + type = hlsl_deref_get_type(ctx, deref); + +@@ -218,6 +219,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, + uniform->is_uniform = 1; + uniform->is_param = temp->is_param; + uniform->buffer = temp->buffer; ++ if (temp->default_values) ++ { ++ /* Transfer default values from the temp to the uniform. */ ++ VKD3D_ASSERT(!uniform->default_values); ++ VKD3D_ASSERT(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); ++ uniform->default_values = temp->default_values; ++ temp->default_values = NULL; ++ } + + if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) + return; +@@ -312,7 +321,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir + } + } + +- if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) ++ if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) + { + vkd3d_free(new_name); + return NULL; +@@ -390,7 +399,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + } + else + { +- assert(i == 0); ++ VKD3D_ASSERT(i == 0); + + if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) + return; +@@ -523,7 +532,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s + } + else + { +- assert(i == 0); ++ VKD3D_ASSERT(i == 0); + + if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) + return; +@@ -918,7 +927,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun + if (return_instr) + { + /* If we're in a loop, we should have used "break" instead. */ +- assert(!in_loop); ++ VKD3D_ASSERT(!in_loop); + + /* Iterate in reverse, to avoid use-after-free when unlinking sources from + * the "uses" list. */ +@@ -940,7 +949,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun + struct hlsl_ir_load *load; + + /* If we're in a loop, we should have used "break" instead. */ +- assert(!in_loop); ++ VKD3D_ASSERT(!in_loop); + + if (tail == &cf_instr->entry) + return has_early_return; +@@ -999,7 +1008,7 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h + struct hlsl_deref coords_deref; + struct hlsl_ir_var *coords; + +- assert(dim_count < 4); ++ VKD3D_ASSERT(dim_count < 4); + + if (!(coords = hlsl_new_synthetic_var(ctx, "coords", + hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) +@@ -1100,9 +1109,9 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_resource_load_params params = {0}; + struct hlsl_ir_node *resource_load; + +- assert(coords->data_type->class == HLSL_CLASS_VECTOR); +- assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); +- assert(coords->data_type->dimx == dim_count); ++ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ VKD3D_ASSERT(coords->data_type->dimx == dim_count); + + if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) + return false; +@@ -1132,7 +1141,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_deref row_deref; + unsigned int i; + +- assert(!hlsl_type_is_row_major(mat->data_type)); ++ VKD3D_ASSERT(!hlsl_type_is_row_major(mat->data_type)); + + if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) + return false; +@@ -1369,7 +1378,7 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co + unsigned int component_count = hlsl_type_component_count(var->data_type); + struct copy_propagation_value *value; + +- assert(component < component_count); ++ VKD3D_ASSERT(component < component_count); + value = copy_propagation_get_value_at_time(&var_def->traces[component], time); + + if (!value) +@@ -1402,7 +1411,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h + var_def->var = var; + + res = rb_put(&state->var_defs, var, &var_def->entry); +- assert(!res); ++ VKD3D_ASSERT(!res); + + return var_def; + } +@@ -1411,7 +1420,7 @@ static void copy_propagation_trace_record_value(struct hlsl_ctx *ctx, + struct copy_propagation_component_trace *trace, struct hlsl_ir_node *node, + unsigned int component, unsigned int time) + { +- assert(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); ++ VKD3D_ASSERT(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); + + if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity, + trace->record_count + 1, sizeof(trace->records[0]))) +@@ -1440,7 +1449,7 @@ static void copy_propagation_invalidate_variable(struct hlsl_ctx *ctx, struct co + /* Don't add an invalidate record if it is already present. */ + if (trace->record_count && trace->records[trace->record_count - 1].timestamp == time) + { +- assert(!trace->records[trace->record_count - 1].node); ++ VKD3D_ASSERT(!trace->records[trace->record_count - 1].node); + continue; + } + +@@ -1623,27 +1632,34 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + + switch (type->class) + { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_GEOMETRY_SHADER: + break; + + case HLSL_CLASS_MATRIX: + case HLSL_CLASS_ARRAY: + case HLSL_CLASS_STRUCT: ++ case HLSL_CLASS_CONSTANT_BUFFER: + /* FIXME: Actually we shouldn't even get here, but we don't split + * matrices yet. */ + return false; + +- case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: +- case HLSL_CLASS_RENDER_TARGET_VIEW: +- case HLSL_CLASS_STRING: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_VOID: + vkd3d_unreachable(); +@@ -1685,11 +1701,11 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, + + if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) + return false; +- assert(count == 1); ++ VKD3D_ASSERT(count == 1); + + if (!(value = copy_propagation_get_value(state, deref->var, start, time))) + return false; +- assert(value->component == 0); ++ VKD3D_ASSERT(value->component == 0); + + /* Only HLSL_IR_LOAD can produce an object. */ + load = hlsl_ir_load(value->node); +@@ -1970,6 +1986,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc + return progress; + } + ++enum validation_result ++{ ++ DEREF_VALIDATION_OK, ++ DEREF_VALIDATION_OUT_OF_BOUNDS, ++ DEREF_VALIDATION_NOT_CONSTANT, ++}; ++ ++static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, ++ const struct hlsl_deref *deref) ++{ ++ struct hlsl_type *type = deref->var->data_type; ++ unsigned int i; ++ ++ for (i = 0; i < deref->path_len; ++i) ++ { ++ struct hlsl_ir_node *path_node = deref->path[i].node; ++ unsigned int idx = 0; ++ ++ VKD3D_ASSERT(path_node); ++ if (path_node->type != HLSL_IR_CONSTANT) ++ return DEREF_VALIDATION_NOT_CONSTANT; ++ ++ /* We should always have generated a cast to UINT. */ ++ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR ++ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); ++ ++ idx = hlsl_ir_constant(path_node)->value.u[0].u; ++ ++ switch (type->class) ++ { ++ case HLSL_CLASS_VECTOR: ++ if (idx >= type->dimx) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Vector index is out of bounds. %u/%u", idx, type->dimx); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_MATRIX: ++ if (idx >= hlsl_type_major_size(type)) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_ARRAY: ++ if (idx >= type->e.array.elements_count) ++ { ++ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); ++ return DEREF_VALIDATION_OUT_OF_BOUNDS; ++ } ++ break; ++ ++ case HLSL_CLASS_STRUCT: ++ break; ++ ++ default: ++ vkd3d_unreachable(); ++ } ++ ++ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); ++ } ++ ++ return DEREF_VALIDATION_OK; ++} ++ + static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, + const char *usage) + { +@@ -1979,7 +2065,7 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct + { + struct hlsl_ir_node *path_node = deref->path[i].node; + +- assert(path_node); ++ VKD3D_ASSERT(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + hlsl_note(ctx, &path_node->loc, VKD3D_SHADER_LOG_ERROR, + "Expression for %s within \"%s\" cannot be resolved statically.", +@@ -1987,60 +2073,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct + } + } + +-static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, ++static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + void *context) + { +- unsigned int start, count; +- +- if (instr->type == HLSL_IR_RESOURCE_LOAD) ++ switch (instr->type) + { +- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); +- +- if (!load->resource.var->is_uniform) ++ case HLSL_IR_RESOURCE_LOAD: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Loaded resource must have a single uniform source."); ++ struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); ++ ++ if (!load->resource.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Loaded resource must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Loaded resource from \"%s\" must be determinable at compile time.", ++ load->resource.var->name); ++ note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); ++ } ++ ++ if (load->sampler.var) ++ { ++ if (!load->sampler.var->is_uniform) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Resource load sampler must have a single uniform source."); ++ } ++ else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) ++ { ++ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, ++ "Resource load sampler from \"%s\" must be determinable at compile time.", ++ load->sampler.var->name); ++ note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); ++ } ++ } ++ break; + } +- else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) ++ case HLSL_IR_RESOURCE_STORE: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Loaded resource from \"%s\" must be determinable at compile time.", +- load->resource.var->name); +- note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); +- } ++ struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); + +- if (load->sampler.var) +- { +- if (!load->sampler.var->is_uniform) ++ if (!store->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Resource load sampler must have a single uniform source."); ++ "Accessed resource must have a single uniform source."); + } +- else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) ++ else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Resource load sampler from \"%s\" must be determinable at compile time.", +- load->sampler.var->name); +- note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); ++ "Accessed resource from \"%s\" must be determinable at compile time.", ++ store->resource.var->name); ++ note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); + } ++ break; + } +- } +- else if (instr->type == HLSL_IR_RESOURCE_STORE) +- { +- struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); +- +- if (!store->resource.var->is_uniform) ++ case HLSL_IR_LOAD: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Accessed resource must have a single uniform source."); ++ struct hlsl_ir_load *load = hlsl_ir_load(instr); ++ validate_component_index_range_from_deref(ctx, &load->src); ++ break; + } +- else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) ++ case HLSL_IR_STORE: + { +- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, +- "Accessed resource from \"%s\" must be determinable at compile time.", +- store->resource.var->name); +- note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); ++ struct hlsl_ir_store *store = hlsl_ir_store(instr); ++ validate_component_index_range_from_deref(ctx, &store->lhs); ++ break; + } ++ default: ++ break; + } + + return false; +@@ -2436,7 +2539,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir + return false; + + deref = &hlsl_ir_load(instr)->src; +- assert(deref->var); ++ VKD3D_ASSERT(deref->var); + + if (deref->path_len == 0) + return false; +@@ -2510,7 +2613,7 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc + return false; + + deref = &hlsl_ir_store(instr)->lhs; +- assert(deref->var); ++ VKD3D_ASSERT(deref->var); + + if (deref->path_len == 0) + return false; +@@ -2531,6 +2634,124 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc + return false; + } + ++/* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant ++ * index into multiple constant loads, where the value of only one of them ends up in the resulting ++ * node. ++ * This is achieved through a synthetic variable. The non-constant index is compared for equality ++ * with every possible value it can have within the array bounds, and the ternary operator is used ++ * to update the value of the synthetic var when the equality check passes. */ ++static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, ++ struct hlsl_block *block) ++{ ++ struct hlsl_constant_value zero_value = {0}; ++ struct hlsl_ir_node *cut_index, *zero, *store; ++ unsigned int i, i_cut, element_count; ++ const struct hlsl_deref *deref; ++ struct hlsl_type *cut_type; ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_var *var; ++ bool row_major; ++ ++ if (instr->type != HLSL_IR_LOAD) ++ return false; ++ load = hlsl_ir_load(instr); ++ deref = &load->src; ++ ++ if (deref->path_len == 0) ++ return false; ++ ++ for (i = deref->path_len - 1; ; --i) ++ { ++ if (deref->path[i].node->type != HLSL_IR_CONSTANT) ++ { ++ i_cut = i; ++ break; ++ } ++ ++ if (i == 0) ++ return false; ++ } ++ ++ cut_index = deref->path[i_cut].node; ++ cut_type = deref->var->data_type; ++ for (i = 0; i < i_cut; ++i) ++ cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node); ++ ++ row_major = hlsl_type_is_row_major(cut_type); ++ VKD3D_ASSERT(cut_type->class == HLSL_CLASS_ARRAY || row_major); ++ ++ if (!(var = hlsl_new_synthetic_var(ctx, row_major ? "row_major-load" : "array-load", instr->data_type, &instr->loc))) ++ return false; ++ ++ if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, zero); ++ ++ if (!(store = hlsl_new_simple_store(ctx, var, zero))) ++ return false; ++ hlsl_block_add_instr(block, store); ++ ++ TRACE("Lowering non-constant %s load on variable '%s'.\n", row_major ? "row_major" : "array", deref->var->name); ++ ++ element_count = hlsl_type_element_count(cut_type); ++ for (i = 0; i < element_count; ++i) ++ { ++ struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); ++ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; ++ struct hlsl_ir_node *const_i, *equals, *ternary, *var_store; ++ struct hlsl_ir_load *var_load, *specific_load; ++ struct hlsl_deref deref_copy = {0}; ++ ++ if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc))) ++ return false; ++ hlsl_block_add_instr(block, const_i); ++ ++ operands[0] = cut_index; ++ operands[1] = const_i; ++ if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc))) ++ return false; ++ hlsl_block_add_instr(block, equals); ++ ++ if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) ++ return false; ++ hlsl_block_add_instr(block, equals); ++ ++ if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc))) ++ return false; ++ hlsl_block_add_instr(block, &var_load->node); ++ ++ if (!hlsl_copy_deref(ctx, &deref_copy, deref)) ++ return false; ++ hlsl_src_remove(&deref_copy.path[i_cut]); ++ hlsl_src_from_node(&deref_copy.path[i_cut], const_i); ++ ++ if (!(specific_load = hlsl_new_load_index(ctx, &deref_copy, NULL, &cut_index->loc))) ++ { ++ hlsl_cleanup_deref(&deref_copy); ++ return false; ++ } ++ hlsl_block_add_instr(block, &specific_load->node); ++ ++ hlsl_cleanup_deref(&deref_copy); ++ ++ operands[0] = equals; ++ operands[1] = &specific_load->node; ++ operands[2] = &var_load->node; ++ if (!(ternary = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc))) ++ return false; ++ hlsl_block_add_instr(block, ternary); ++ ++ if (!(var_store = hlsl_new_simple_store(ctx, var, ternary))) ++ return false; ++ hlsl_block_add_instr(block, var_store); ++ } ++ ++ if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, &load->node); ++ ++ return true; ++} + /* Lower combined samples and sampler variables to synthesized separated textures and samplers. + * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ + static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +@@ -2554,11 +2775,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + case HLSL_RESOURCE_RESINFO: + case HLSL_RESOURCE_SAMPLE_CMP: + case HLSL_RESOURCE_SAMPLE_CMP_LZ: +- case HLSL_RESOURCE_SAMPLE_GRAD: + case HLSL_RESOURCE_SAMPLE_INFO: + return false; + + case HLSL_RESOURCE_SAMPLE: ++ case HLSL_RESOURCE_SAMPLE_GRAD: + case HLSL_RESOURCE_SAMPLE_LOD: + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_PROJ: +@@ -2573,7 +2794,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + return false; + } + +- assert(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); ++ VKD3D_ASSERT(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); + + if (!(name = hlsl_get_string_buffer(ctx))) + return false; +@@ -2590,7 +2811,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + struct hlsl_type *arr_type = load->resource.var->data_type; + for (i = 0; i < load->resource.path_len; ++i) + { +- assert(arr_type->class == HLSL_CLASS_ARRAY); ++ VKD3D_ASSERT(arr_type->class == HLSL_CLASS_ARRAY); + texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); + arr_type = arr_type->e.array.type; + } +@@ -2619,8 +2840,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in + + hlsl_copy_deref(ctx, &load->sampler, &load->resource); + load->resource.var = var; +- assert(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); +- assert(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); ++ VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); ++ VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); + + return true; + } +@@ -2918,6 +3139,108 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct + return true; + } + ++/* Lower SIN/COS to SINCOS for SM1. */ ++static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *arg, *half, *two_pi, *reciprocal_two_pi, *neg_pi; ++ struct hlsl_constant_value half_value, two_pi_value, reciprocal_two_pi_value, neg_pi_value; ++ struct hlsl_ir_node *mad, *frc, *reduced; ++ struct hlsl_type *type; ++ struct hlsl_ir_expr *expr; ++ enum hlsl_ir_expr_op op; ++ struct hlsl_ir_node *sincos; ++ int i; ++ ++ if (instr->type != HLSL_IR_EXPR) ++ return false; ++ expr = hlsl_ir_expr(instr); ++ ++ if (expr->op == HLSL_OP1_SIN) ++ op = HLSL_OP1_SIN_REDUCED; ++ else if (expr->op == HLSL_OP1_COS) ++ op = HLSL_OP1_COS_REDUCED; ++ else ++ return false; ++ ++ arg = expr->operands[0].node; ++ type = arg->data_type; ++ ++ /* Reduce the range of the input angles to [-pi, pi]. */ ++ for (i = 0; i < type->dimx; ++i) ++ { ++ half_value.u[i].f = 0.5; ++ two_pi_value.u[i].f = 2.0 * M_PI; ++ reciprocal_two_pi_value.u[i].f = 1.0 / (2.0 * M_PI); ++ neg_pi_value.u[i].f = -M_PI; ++ } ++ ++ if (!(half = hlsl_new_constant(ctx, type, &half_value, &instr->loc)) ++ || !(two_pi = hlsl_new_constant(ctx, type, &two_pi_value, &instr->loc)) ++ || !(reciprocal_two_pi = hlsl_new_constant(ctx, type, &reciprocal_two_pi_value, &instr->loc)) ++ || !(neg_pi = hlsl_new_constant(ctx, type, &neg_pi_value, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, half); ++ hlsl_block_add_instr(block, two_pi); ++ hlsl_block_add_instr(block, reciprocal_two_pi); ++ hlsl_block_add_instr(block, neg_pi); ++ ++ if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half))) ++ return false; ++ hlsl_block_add_instr(block, mad); ++ if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mad, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, frc); ++ if (!(reduced = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, frc, two_pi, neg_pi))) ++ return false; ++ hlsl_block_add_instr(block, reduced); ++ ++ if (type->dimx == 1) ++ { ++ if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, sincos); ++ } ++ else ++ { ++ struct hlsl_ir_node *comps[4] = {0}; ++ struct hlsl_ir_var *var; ++ struct hlsl_deref var_deref; ++ struct hlsl_ir_load *var_load; ++ ++ for (i = 0; i < type->dimx; ++i) ++ { ++ uint32_t s = hlsl_swizzle_from_writemask(1 << i); ++ ++ if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, reduced, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, comps[i]); ++ } ++ ++ if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) ++ return false; ++ hlsl_init_simple_deref_from_var(&var_deref, var); ++ ++ for (i = 0; i < type->dimx; ++i) ++ { ++ struct hlsl_block store_block; ++ ++ if (!(sincos = hlsl_new_unary_expr(ctx, op, comps[i], &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, sincos); ++ ++ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, sincos)) ++ return false; ++ hlsl_block_add_block(block, &store_block); ++ } ++ ++ if (!(var_load = hlsl_new_load_index(ctx, &var_deref, NULL, &instr->loc))) ++ return false; ++ hlsl_block_add_instr(block, &var_load->node); ++ } ++ ++ return true; ++} ++ + static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; +@@ -2936,7 +3259,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); + + /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ +- assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) + return false; +@@ -2992,7 +3315,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + return false; + } + +- assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + + type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, + instr->data_type->dimx, instr->data_type->dimy); +@@ -3290,7 +3613,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr + return false; + + /* Narrowing casts should have already been lowered. */ +- assert(type->dimx == arg_type->dimx); ++ VKD3D_ASSERT(type->dimx == arg_type->dimx); + + zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); + if (!zero) +@@ -3312,7 +3635,7 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_ir_node *cond; + +- assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); ++ VKD3D_ASSERT(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + + if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) + { +@@ -3511,7 +3834,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru + { + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; +- assert(arg1->data_type->dimx == arg2->data_type->dimx); ++ VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx); + dimx = arg1->data_type->dimx; + is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; + +@@ -3729,6 +4052,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + case HLSL_IR_INDEX: + case HLSL_IR_LOAD: + case HLSL_IR_RESOURCE_LOAD: ++ case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SWIZZLE: + if (list_empty(&instr->uses)) + { +@@ -3786,8 +4110,8 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, + if (!deref->rel_offset.node) + return false; + +- assert(deref->var); +- assert(deref->rel_offset.node->type != HLSL_IR_CONSTANT); ++ VKD3D_ASSERT(deref->var); ++ VKD3D_ASSERT(deref->rel_offset.node->type != HLSL_IR_CONSTANT); + deref->var->indexable = true; + + return true; +@@ -3815,15 +4139,16 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { ++ const struct hlsl_reg_reservation *reservation = &var->reg_reservation; + unsigned int r; + +- if (var->reg_reservation.reg_type) ++ if (reservation->reg_type) + { + for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) + { + if (var->regs[r].allocation_size > 0) + { +- if (var->reg_reservation.reg_type != get_regset_name(r)) ++ if (reservation->reg_type != get_regset_name(r)) + { + struct vkd3d_string_buffer *type_string; + +@@ -3839,10 +4164,8 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) + else + { + var->regs[r].allocated = true; +- var->regs[r].id = var->reg_reservation.reg_index; +- TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, +- var->reg_reservation.reg_index, var->reg_reservation.reg_type, +- var->reg_reservation.reg_index + var->regs[r].allocation_size); ++ var->regs[r].space = reservation->reg_space; ++ var->regs[r].index = reservation->reg_index; + } + } + } +@@ -4010,6 +4333,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop + break; + } + case HLSL_IR_CONSTANT: ++ case HLSL_IR_STRING_CONSTANT: + break; + } + } +@@ -4111,7 +4435,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + unsigned int writemask; + uint32_t reg_idx; + +- assert(component_count <= reg_size); ++ VKD3D_ASSERT(component_count <= reg_size); + + for (reg_idx = 0;; ++reg_idx) + { +@@ -4133,6 +4457,30 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a + return ret; + } + ++/* Allocate a register with writemask, while reserving reg_writemask. */ ++static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, ++ unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) ++{ ++ struct hlsl_reg ret = {0}; ++ uint32_t reg_idx; ++ ++ VKD3D_ASSERT((reg_writemask & writemask) == writemask); ++ ++ for (reg_idx = 0;; ++reg_idx) ++ { ++ if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) ++ break; ++ } ++ ++ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); ++ ++ ret.id = reg_idx; ++ ret.allocation_size = 1; ++ ret.writemask = writemask; ++ ret.allocated = true; ++ return ret; ++} ++ + static bool is_range_available(const struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) + { +@@ -4181,8 +4529,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, + { + unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + ++ /* FIXME: We could potentially pack structs or arrays more efficiently... */ ++ + if (type->class <= HLSL_CLASS_VECTOR) +- return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); ++ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); + else + return allocate_range(ctx, allocator, first_write, last_read, reg_size); + } +@@ -4224,7 +4574,7 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls + { + enum hlsl_sampler_dim dim; + +- assert(!load->sampler.var); ++ VKD3D_ASSERT(!load->sampler.var); + + dim = var->objects_usage[regset][index].sampler_dim; + if (dim != load->sampling_dim) +@@ -4334,6 +4684,44 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) + } + } + ++static void allocate_instr_temp_register(struct hlsl_ctx *ctx, ++ struct hlsl_ir_node *instr, struct register_allocator *allocator) ++{ ++ unsigned int reg_writemask = 0, dst_writemask = 0; ++ ++ if (instr->reg.allocated || !instr->last_read) ++ return; ++ ++ if (instr->type == HLSL_IR_EXPR) ++ { ++ switch (hlsl_ir_expr(instr)->op) ++ { ++ case HLSL_OP1_COS_REDUCED: ++ dst_writemask = VKD3DSP_WRITEMASK_0; ++ reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_0; ++ break; ++ ++ case HLSL_OP1_SIN_REDUCED: ++ dst_writemask = VKD3DSP_WRITEMASK_1; ++ reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1; ++ break; ++ ++ default: ++ break; ++ } ++ } ++ ++ if (reg_writemask) ++ instr->reg = allocate_register_with_masks(ctx, allocator, ++ instr->index, instr->last_read, reg_writemask, dst_writemask); ++ else ++ instr->reg = allocate_numeric_registers_for_type(ctx, allocator, ++ instr->index, instr->last_read, instr->data_type); ++ ++ TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, ++ debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); ++} ++ + static void allocate_variable_temp_register(struct hlsl_ctx *ctx, + struct hlsl_ir_var *var, struct register_allocator *allocator) + { +@@ -4373,13 +4761,7 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, + if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) + continue; + +- if (!instr->reg.allocated && instr->last_read) +- { +- instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, +- instr->data_type); +- TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, +- debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); +- } ++ allocate_instr_temp_register(ctx, instr, allocator); + + switch (instr->type) + { +@@ -4474,9 +4856,9 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, + constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); + +- assert(hlsl_is_numeric_type(type)); +- assert(type->dimy == 1); +- assert(constant->reg.writemask); ++ VKD3D_ASSERT(hlsl_is_numeric_type(type)); ++ VKD3D_ASSERT(type->dimy == 1); ++ VKD3D_ASSERT(constant->reg.writemask); + + for (x = 0, i = 0; x < 4; ++x) + { +@@ -4587,8 +4969,46 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) + list_move_tail(&ctx->extern_vars, &sorted); + } + ++/* In SM2, 'sincos' expects specific constants as src1 and src2 arguments. ++ * These have to be referenced directly, i.e. as 'c' not 'r'. */ ++static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct register_allocator *allocator) ++{ ++ const struct hlsl_ir_node *instr; ++ struct hlsl_type *type; ++ ++ if (ctx->profile->major_version >= 3) ++ return; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED ++ || hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED)) ++ { ++ type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); ++ ++ ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); ++ TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); ++ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); ++ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); ++ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); ++ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); ++ ++ ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); ++ TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); ++ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); ++ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); ++ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); ++ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); ++ ++ return; ++ } ++ } ++} ++ + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) + { ++ struct register_allocator allocator_used = {0}; + struct register_allocator allocator = {0}; + struct hlsl_ir_var *var; + +@@ -4597,6 +5017,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC]; + + if (!var->is_uniform || reg_size == 0) + continue; +@@ -4606,15 +5027,18 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + unsigned int reg_idx = var->reg_reservation.reg_index; + unsigned int i; + +- assert(reg_size % 4 == 0); ++ VKD3D_ASSERT(reg_size % 4 == 0); + for (i = 0; i < reg_size / 4; ++i) + { +- if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) ++ if (i < bind_count) + { +- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, +- "Overlapping register() reservations on 'c%u'.", reg_idx + i); ++ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Overlapping register() reservations on 'c%u'.", reg_idx + i); ++ } ++ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); + } +- + record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); + } + +@@ -4627,6 +5051,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + } + } + ++ vkd3d_free(allocator_used.allocations); ++ + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; +@@ -4644,6 +5070,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi + + allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + ++ allocate_sincos_const_registers(ctx, &entry_func->body, &allocator); ++ + vkd3d_free(allocator.allocations); + } + +@@ -4693,11 +5121,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + uint32_t reg; + bool builtin; + +- assert(var->semantic.name); ++ VKD3D_ASSERT(var->semantic.name); + + if (ctx->profile->major_version < 4) + { +- D3DSHADER_PARAM_REGISTER_TYPE sm1_type; ++ struct vkd3d_shader_version version; + D3DDECLUSAGE usage; + uint32_t usage_idx; + +@@ -4705,8 +5133,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + return; + +- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); +- if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ builtin = hlsl_sm1_register_from_semantic(&version, ++ var->semantic.name, var->semantic.index, output, &type, ®); ++ if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -4715,7 +5147,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + + if ((!output && !var->last_read) || (output && !var->first_write)) + return; +- type = (enum vkd3d_shader_register_type)sm1_type; + } + else + { +@@ -4762,13 +5193,14 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) + } + } + +-static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) ++static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) + { + const struct hlsl_buffer *buffer; + + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) + { +- if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) ++ if (buffer->reservation.reg_type == 'b' ++ && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) + return buffer; + } + return NULL; +@@ -4783,6 +5215,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va + if (register_reservation) + { + var->buffer_offset = 4 * var->reg_reservation.reg_index; ++ var->has_explicit_bind_point = 1; + } + else + { +@@ -4815,6 +5248,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va + } + } + var->buffer_offset = var->reg_reservation.offset_index; ++ var->has_explicit_bind_point = 1; + } + else + { +@@ -4913,11 +5347,19 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) + } + } + ++static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx) ++{ ++ if (hlsl_version_ge(ctx, 5, 1)) ++ return UINT_MAX; ++ ++ return 13; ++} ++ + static void allocate_buffers(struct hlsl_ctx *ctx) + { + struct hlsl_buffer *buffer; ++ uint32_t index = 0, id = 0; + struct hlsl_ir_var *var; +- uint32_t index = 0; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +@@ -4938,32 +5380,59 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + + if (buffer->type == HLSL_BUFFER_CONSTANT) + { +- if (buffer->reservation.reg_type == 'b') ++ const struct hlsl_reg_reservation *reservation = &buffer->reservation; ++ ++ if (reservation->reg_type == 'b') + { +- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); ++ const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, ++ reservation->reg_space, reservation->reg_index); ++ unsigned int max_index = get_max_cbuffer_reg_index(ctx); ++ ++ if (buffer->reservation.reg_index > max_index) ++ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Buffer reservation cb%u exceeds target's maximum (cb%u).", ++ buffer->reservation.reg_index, max_index); + + if (reserved_buffer && reserved_buffer != buffer) + { + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, +- "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); ++ "Multiple buffers bound to space %u, index %u.", ++ reservation->reg_space, reservation->reg_index); + hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, +- "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); ++ "Buffer %s is already bound to space %u, index %u.", ++ reserved_buffer->name, reservation->reg_space, reservation->reg_index); + } + +- buffer->reg.id = buffer->reservation.reg_index; ++ buffer->reg.space = reservation->reg_space; ++ buffer->reg.index = reservation->reg_index; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ buffer->reg.id = id++; ++ else ++ buffer->reg.id = buffer->reg.index; + buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; +- TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); ++ TRACE("Allocated reserved %s to space %u, index %u, id %u.\n", ++ buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id); + } +- else if (!buffer->reservation.reg_type) ++ else if (!reservation->reg_type) + { +- while (get_reserved_buffer(ctx, index)) ++ unsigned int max_index = get_max_cbuffer_reg_index(ctx); ++ while (get_reserved_buffer(ctx, 0, index)) + ++index; + +- buffer->reg.id = index; ++ if (index > max_index) ++ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, ++ "Too many buffers allocated, target's maximum is %u.", max_index); ++ ++ buffer->reg.space = 0; ++ buffer->reg.index = index; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ buffer->reg.id = id++; ++ else ++ buffer->reg.id = buffer->reg.index; + buffer->reg.allocation_size = 1; + buffer->reg.allocated = true; +- TRACE("Allocated %s to cb%u.\n", buffer->name, index); ++ TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id); + ++index; + } + else +@@ -4980,7 +5449,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) + } + + static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, +- uint32_t index, bool allocated_only) ++ uint32_t space, uint32_t index, bool allocated_only) + { + const struct hlsl_ir_var *var; + unsigned int start, count; +@@ -4995,12 +5464,18 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + start = var->reg_reservation.reg_index; + count = var->data_type->reg_size[regset]; + ++ if (var->reg_reservation.reg_space != space) ++ continue; ++ + if (!var->regs[regset].allocated && allocated_only) + continue; + } + else if (var->regs[regset].allocated) + { +- start = var->regs[regset].id; ++ if (var->regs[regset].space != space) ++ continue; ++ ++ start = var->regs[regset].index; + count = var->regs[regset].allocation_size; + } + else +@@ -5017,8 +5492,8 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum + static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + { + char regset_name = get_regset_name(regset); ++ uint32_t min_index = 0, id = 0; + struct hlsl_ir_var *var; +- uint32_t min_index = 0; + + if (regset == HLSL_REGSET_UAVS) + { +@@ -5041,35 +5516,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + if (var->regs[regset].allocated) + { + const struct hlsl_ir_var *reserved_object, *last_reported = NULL; +- unsigned int index, i; ++ unsigned int i; + +- if (var->regs[regset].id < min_index) ++ if (var->regs[regset].index < min_index) + { +- assert(regset == HLSL_REGSET_UAVS); ++ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, + "UAV index (%u) must be higher than the maximum render target index (%u).", +- var->regs[regset].id, min_index - 1); ++ var->regs[regset].index, min_index - 1); + continue; + } + + for (i = 0; i < count; ++i) + { +- index = var->regs[regset].id + i; ++ unsigned int space = var->regs[regset].space; ++ unsigned int index = var->regs[regset].index + i; + + /* get_allocated_object() may return "var" itself, but we + * actually want that, otherwise we'll end up reporting the + * same conflict between the same two variables twice. */ +- reserved_object = get_allocated_object(ctx, regset, index, true); ++ reserved_object = get_allocated_object(ctx, regset, space, index, true); + if (reserved_object && reserved_object != var && reserved_object != last_reported) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, +- "Multiple variables bound to %c%u.", regset_name, index); ++ "Multiple variables bound to space %u, %c%u.", regset_name, space, index); + hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, +- "Variable '%s' is already bound to %c%u.", reserved_object->name, +- regset_name, index); ++ "Variable '%s' is already bound to space %u, %c%u.", ++ reserved_object->name, regset_name, space, index); + last_reported = reserved_object; + } + } ++ ++ if (hlsl_version_ge(ctx, 5, 1)) ++ var->regs[regset].id = id++; ++ else ++ var->regs[regset].id = var->regs[regset].index; ++ TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n", ++ var->name, var->regs[regset].space, regset_name, var->regs[regset].index, ++ regset_name, var->regs[regset].index + count, var->regs[regset].id); + } + else + { +@@ -5078,7 +5562,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + + while (available < count) + { +- if (get_allocated_object(ctx, regset, index, false)) ++ if (get_allocated_object(ctx, regset, 0, index, false)) + available = 0; + else + ++available; +@@ -5086,10 +5570,15 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) + } + index -= count; + +- var->regs[regset].id = index; ++ var->regs[regset].space = 0; ++ var->regs[regset].index = index; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ var->regs[regset].id = id++; ++ else ++ var->regs[regset].id = var->regs[regset].index; + var->regs[regset].allocated = true; +- TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, +- index + count); ++ TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name, ++ regset_name, index, regset_name, index + count, var->regs[regset].id); + ++index; + } + } +@@ -5109,12 +5598,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + +- assert(path_node); ++ VKD3D_ASSERT(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return false; + + /* We should always have generated a cast to UINT. */ +- assert(path_node->data_type->class == HLSL_CLASS_SCALAR ++ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; +@@ -5123,21 +5612,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + { + case HLSL_CLASS_VECTOR: + if (idx >= type->dimx) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Vector index is out of bounds. %u/%u", idx, type->dimx); + return false; +- } + *start += idx; + break; + + case HLSL_CLASS_MATRIX: + if (idx >= hlsl_type_major_size(type)) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); + return false; +- } + if (hlsl_type_is_row_major(type)) + *start += idx * type->dimx; + else +@@ -5146,11 +5627,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl + + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) +- { +- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, +- "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); + return false; +- } + *start += idx * hlsl_type_component_count(type->e.array.type); + break; + +@@ -5186,11 +5663,11 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + +- assert(path_node); ++ VKD3D_ASSERT(path_node); + if (path_node->type == HLSL_IR_CONSTANT) + { + /* We should always have generated a cast to UINT. */ +- assert(path_node->data_type->class == HLSL_CLASS_SCALAR ++ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; +@@ -5240,8 +5717,8 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + +- assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); +- assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); ++ VKD3D_ASSERT(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); ++ VKD3D_ASSERT(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); + return index_is_constant; + } + +@@ -5256,16 +5733,17 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref + if (offset_node) + { + /* We should always have generated a cast to UINT. */ +- assert(offset_node->data_type->class == HLSL_CLASS_SCALAR ++ VKD3D_ASSERT(offset_node->data_type->class == HLSL_CLASS_SCALAR + && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); +- assert(offset_node->type != HLSL_IR_CONSTANT); ++ VKD3D_ASSERT(offset_node->type != HLSL_IR_CONSTANT); + return false; + } + + size = deref->var->data_type->reg_size[regset]; + if (*offset >= size) + { +- hlsl_error(ctx, &offset_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, ++ /* FIXME: Report a more specific location for the constant deref. */ ++ hlsl_error(ctx, &deref->var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Dereference is out of bounds. %u/%u", *offset, size); + return false; + } +@@ -5280,8 +5758,9 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl + if (hlsl_offset_from_deref(ctx, deref, &offset)) + return offset; + +- hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", +- hlsl_node_type_to_string(deref->rel_offset.node->type)); ++ if (deref->rel_offset.node) ++ hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", ++ hlsl_node_type_to_string(deref->rel_offset.node->type)); + + return 0; + } +@@ -5292,9 +5771,10 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere + struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); + +- assert(deref->data_type); +- assert(hlsl_is_numeric_type(deref->data_type)); ++ VKD3D_ASSERT(deref->data_type); ++ VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type)); + ++ ret.index += offset / 4; + ret.id += offset / 4; + + ret.writemask = 0xf & (0xf << (offset % 4)); +@@ -5446,6 +5926,414 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) + } while (progress); + } + ++static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, ++ struct vsir_program *program, bool output, struct hlsl_ir_var *var) ++{ ++ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; ++ enum vkd3d_shader_register_type type; ++ struct shader_signature *signature; ++ struct signature_element *element; ++ unsigned int register_index, mask; ++ ++ if ((!output && !var->last_read) || (output && !var->first_write)) ++ return; ++ ++ if (output) ++ signature = &program->output_signature; ++ else ++ signature = &program->input_signature; ++ ++ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, ++ signature->element_count + 1, sizeof(*signature->elements))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element = &signature->elements[signature->element_count++]; ++ ++ if (!hlsl_sm1_register_from_semantic(&program->shader_version, ++ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) ++ { ++ unsigned int usage, usage_idx; ++ bool ret; ++ ++ register_index = var->regs[HLSL_REGSET_NUMERIC].id; ++ ++ ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); ++ VKD3D_ASSERT(ret); ++ /* With the exception of vertex POSITION output, none of these are ++ * system values. Pixel POSITION input is not equivalent to ++ * SV_Position; the closer equivalent is VPOS, which is not declared ++ * as a semantic. */ ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX ++ && output && usage == VKD3D_DECL_USAGE_POSITION) ++ sysval = VKD3D_SHADER_SV_POSITION; ++ } ++ mask = (1 << var->data_type->dimx) - 1; ++ ++ memset(element, 0, sizeof(*element)); ++ if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) ++ { ++ --signature->element_count; ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ element->semantic_index = var->semantic.index; ++ element->sysval_semantic = sysval; ++ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ element->register_index = register_index; ++ element->target_location = register_index; ++ element->register_count = 1; ++ element->mask = mask; ++ element->used_mask = mask; ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) ++ element->interpolation_mode = VKD3DSIM_LINEAR; ++} ++ ++static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) ++{ ++ struct hlsl_ir_var *var; ++ ++ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) ++ { ++ if (var->is_input_semantic) ++ sm1_generate_vsir_signature_entry(ctx, program, false, var); ++ if (var->is_output_semantic) ++ sm1_generate_vsir_signature_entry(ctx, program, true, var); ++ } ++} ++ ++/* OBJECTIVE: Translate all the information from ctx and entry_func to the ++ * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() ++ * without relying on ctx and entry_func. */ ++static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, ++ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) ++{ ++ struct vkd3d_shader_version version = {0}; ++ struct vkd3d_bytecode_buffer buffer = {0}; ++ ++ version.major = ctx->profile->major_version; ++ version.minor = ctx->profile->minor_version; ++ version.type = ctx->profile->type; ++ if (!vsir_program_init(program, NULL, &version, 0)) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ write_sm1_uniforms(ctx, &buffer); ++ if (buffer.status) ++ { ++ vkd3d_free(buffer.data); ++ ctx->result = buffer.status; ++ return; ++ } ++ ctab->code = buffer.data; ++ ctab->size = buffer.size; ++ ++ sm1_generate_vsir_signature(ctx, program); ++} ++ ++static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, ++ struct hlsl_block **found_block) ++{ ++ struct hlsl_ir_node *node; ++ ++ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (node == stop_point) ++ return NULL; ++ ++ if (node->type == HLSL_IR_IF) ++ { ++ struct hlsl_ir_if *iff = hlsl_ir_if(node); ++ struct hlsl_ir_jump *jump = NULL; ++ ++ if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) ++ return jump; ++ if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) ++ return jump; ++ } ++ else if (node->type == HLSL_IR_JUMP) ++ { ++ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); ++ ++ if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) ++ { ++ *found_block = block; ++ return jump; ++ } ++ } ++ } ++ ++ return NULL; ++} ++ ++static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) ++{ ++ /* Always use the explicit limit if it has been passed. */ ++ if (loop->unroll_limit) ++ return loop->unroll_limit; ++ ++ /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ return 1024; ++ ++ /* SM4 limits implicit unrolling to 254 iterations. */ ++ if (hlsl_version_ge(ctx, 4, 0)) ++ return 254; ++ ++ /* SM<3 implicitly unrolls up to 1024 iterations. */ ++ return 1024; ++} ++ ++static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) ++{ ++ unsigned int max_iterations, i; ++ ++ max_iterations = loop_unrolling_get_max_iterations(ctx, loop); ++ ++ for (i = 0; i < max_iterations; ++i) ++ { ++ struct hlsl_block tmp_dst, *jump_block; ++ struct hlsl_ir_jump *jump = NULL; ++ ++ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) ++ return false; ++ list_move_before(&loop->node.entry, &tmp_dst.instrs); ++ hlsl_block_cleanup(&tmp_dst); ++ ++ hlsl_run_const_passes(ctx, block); ++ ++ if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) ++ { ++ enum hlsl_ir_jump_type type = jump->type; ++ ++ if (jump_block != loop_parent) ++ { ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, ++ "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); ++ return false; ++ } ++ ++ list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); ++ hlsl_block_cleanup(&tmp_dst); ++ ++ if (type == HLSL_IR_JUMP_BREAK) ++ break; ++ } ++ } ++ ++ /* Native will not emit an error if max_iterations has been reached with an ++ * explicit limit. It also will not insert a loop if there are iterations left ++ * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ ++ if (!loop->unroll_limit && i == max_iterations) ++ { ++ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, ++ "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); ++ return false; ++ } ++ ++ list_remove(&loop->node.entry); ++ hlsl_free_instr(&loop->node); ++ ++ return true; ++} ++ ++/* ++ * loop_unrolling_find_unrollable_loop() is not the normal way to do things; ++ * normal passes simply iterate over the whole block and apply a transformation ++ * to every relevant instruction. However, loop unrolling can fail, and we want ++ * to leave the loop in its previous state in that case. That isn't a problem by ++ * itself, except that loop unrolling needs copy-prop in order to work properly, ++ * and copy-prop state at the time of the loop depends on the rest of the program ++ * up to that point. This means we need to clone the whole program, and at that ++ * point we have to search it again anyway to find the clone of the loop we were ++ * going to unroll. ++ * ++ * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop ++ * up until the loop instruction, clone just that loop, then use copyprop again ++ * with the saved state after unrolling. However, copyprop currently isn't built ++ * for that yet [notably, it still relies on indices]. Note also this still doesn't ++ * really let us use transform_ir() anyway [since we don't have a good way to say ++ * "copyprop from the beginning of the program up to the instruction we're ++ * currently processing" from the callback]; we'd have to use a dedicated ++ * recursive function instead. */ ++static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, ++ struct hlsl_block **containing_block) ++{ ++ struct hlsl_ir_node *instr; ++ ++ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ switch (instr->type) ++ { ++ case HLSL_IR_LOOP: ++ { ++ struct hlsl_ir_loop *nested_loop; ++ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); ++ ++ if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) ++ return nested_loop; ++ ++ if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) ++ { ++ *containing_block = block; ++ return loop; ++ } ++ ++ break; ++ } ++ case HLSL_IR_IF: ++ { ++ struct hlsl_ir_loop *loop; ++ struct hlsl_ir_if *iff = hlsl_ir_if(instr); ++ ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) ++ return loop; ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) ++ return loop; ++ ++ break; ++ } ++ case HLSL_IR_SWITCH: ++ { ++ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); ++ struct hlsl_ir_switch_case *c; ++ struct hlsl_ir_loop *loop; ++ ++ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) ++ { ++ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) ++ return loop; ++ } ++ ++ break; ++ } ++ default: ++ break; ++ } ++ } ++ ++ return NULL; ++} ++ ++static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) ++{ ++ while (true) ++ { ++ struct hlsl_block clone, *containing_block; ++ struct hlsl_ir_loop *loop, *cloned_loop; ++ ++ if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) ++ return; ++ ++ if (!hlsl_clone_block(ctx, &clone, block)) ++ return; ++ ++ cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); ++ VKD3D_ASSERT(cloned_loop); ++ ++ if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) ++ { ++ hlsl_block_cleanup(&clone); ++ loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; ++ continue; ++ } ++ ++ hlsl_block_cleanup(block); ++ hlsl_block_init(block); ++ hlsl_block_add_block(block, &clone); ++ } ++} ++ ++static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) ++{ ++ struct hlsl_ir_node *call, *rhs, *store; ++ struct hlsl_ir_function_decl *func; ++ unsigned int component_count; ++ struct hlsl_ir_load *load; ++ struct hlsl_ir_expr *expr; ++ struct hlsl_ir_var *lhs; ++ char *body; ++ ++ static const char template[] = ++ "typedef uint%u uintX;\n" ++ "float%u soft_f16tof32(uintX x)\n" ++ "{\n" ++ " uintX mantissa = x & 0x3ff;\n" ++ " uintX high2 = mantissa >> 8;\n" ++ " uintX high2_check = high2 ? high2 : mantissa;\n" ++ " uintX high6 = high2_check >> 4;\n" ++ " uintX high6_check = high6 ? high6 : high2_check;\n" ++ "\n" ++ " uintX high8 = high6_check >> 2;\n" ++ " uintX high8_check = (high8 ? high8 : high6_check) >> 1;\n" ++ " uintX shift = high6 ? (high2 ? 12 : 4) : (high2 ? 8 : 0);\n" ++ " shift = high8 ? shift + 2 : shift;\n" ++ " shift = high8_check ? shift + 1 : shift;\n" ++ " shift = -shift + 10;\n" ++ " shift = mantissa ? shift : 11;\n" ++ " uintX subnormal_mantissa = ((mantissa << shift) << 23) & 0x7fe000;\n" ++ " uintX subnormal_exp = -(shift << 23) + 0x38800000;\n" ++ " uintX subnormal_val = subnormal_exp + subnormal_mantissa;\n" ++ " uintX subnormal_or_zero = mantissa ? subnormal_val : 0;\n" ++ "\n" ++ " uintX exponent = (((x >> 10) << 23) & 0xf800000) + 0x38000000;\n" ++ "\n" ++ " uintX low_3 = (x << 13) & 0x7fe000;\n" ++ " uintX normalized_val = exponent + low_3;\n" ++ " uintX inf_nan_val = low_3 + 0x7f800000;\n" ++ "\n" ++ " uintX exp_mask = 0x7c00;\n" ++ " uintX is_inf_nan = (x & exp_mask) == exp_mask;\n" ++ " uintX is_normalized = x & exp_mask;\n" ++ "\n" ++ " uintX check = is_inf_nan ? inf_nan_val : normalized_val;\n" ++ " uintX exp_mantissa = (is_normalized ? check : subnormal_or_zero) & 0x7fffe000;\n" ++ " uintX sign_bit = (x << 16) & 0x80000000;\n" ++ "\n" ++ " return asfloat(exp_mantissa + sign_bit);\n" ++ "}\n"; ++ ++ ++ if (node->type != HLSL_IR_EXPR) ++ return false; ++ ++ expr = hlsl_ir_expr(node); ++ ++ if (expr->op != HLSL_OP1_F16TOF32) ++ return false; ++ ++ rhs = expr->operands[0].node; ++ component_count = hlsl_type_component_count(rhs->data_type); ++ ++ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) ++ return false; ++ ++ if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body))) ++ return false; ++ ++ lhs = func->parameters.vars[0]; ++ ++ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) ++ return false; ++ hlsl_block_add_instr(block, store); ++ ++ if (!(call = hlsl_new_call(ctx, func, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, call); ++ ++ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) ++ return false; ++ hlsl_block_add_instr(block, &load->node); ++ ++ return true; ++} ++ + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) + { +@@ -5466,6 +6354,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + if (ctx->result) + return ctx->result; + ++ if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) ++ lower_ir(ctx, lower_f16tof32, body); ++ + lower_return(ctx, entry_func, body, false); + + while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); +@@ -5532,6 +6423,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } + ++ transform_unroll_loops(ctx, body); + hlsl_run_const_passes(ctx, body); + + remove_unreachable_code(ctx, body); +@@ -5541,7 +6433,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_casts_to_bool, body); + lower_ir(ctx, lower_int_dot, body); + +- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); ++ hlsl_transform_ir(ctx, validate_dereferences, body, NULL); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); + if (profile->major_version >= 4) + hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); +@@ -5555,6 +6447,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + + if (profile->major_version < 4) + { ++ while (lower_ir(ctx, lower_nonconstant_array_loads, body)); ++ + lower_ir(ctx, lower_ternary, body); + + lower_ir(ctx, lower_nonfloat_exprs, body); +@@ -5569,6 +6463,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + lower_ir(ctx, lower_round, body); + lower_ir(ctx, lower_ceil, body); + lower_ir(ctx, lower_floor, body); ++ lower_ir(ctx, lower_trig, body); + lower_ir(ctx, lower_comparison_operators, body); + lower_ir(ctx, lower_logic_not, body); + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) +@@ -5628,7 +6523,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry + switch (target_type) + { + case VKD3D_SHADER_TARGET_D3D_BYTECODE: +- return hlsl_sm1_write(ctx, entry_func, out); ++ { ++ uint32_t config_flags = vkd3d_shader_init_config_flags(); ++ struct vkd3d_shader_code ctab = {0}; ++ struct vsir_program program; ++ int result; ++ ++ sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); ++ if (ctx->result) ++ { ++ vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&ctab); ++ return ctx->result; ++ } ++ ++ result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); ++ vsir_program_cleanup(&program); ++ vkd3d_shader_free_shader_code(&ctab); ++ return result; ++ } + + case VKD3D_SHADER_TARGET_DXBC_TPF: + return hlsl_sm4_write(ctx, entry_func, out); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +index 16015fa8a81..db4913b7c62 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +@@ -28,7 +28,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -119,7 +119,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -234,7 +234,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -260,7 +260,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -286,7 +286,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -313,7 +313,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + unsigned int k; + float i; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -339,7 +339,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -384,7 +384,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -418,7 +418,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -442,7 +442,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -487,7 +487,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -527,7 +527,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -553,7 +553,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -598,8 +598,8 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -635,8 +635,8 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -662,8 +662,8 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -689,8 +689,8 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -715,9 +715,9 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); +- assert(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); + + dst->u[0].f = 0.0f; + for (k = 0; k < src1->node.data_type->dimx; ++k) +@@ -743,11 +743,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); +- assert(type == src3->node.data_type->e.numeric.type); +- assert(src1->node.data_type->dimx == src2->node.data_type->dimx); +- assert(src3->node.data_type->dimx == 1); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); ++ VKD3D_ASSERT(src3->node.data_type->dimx == 1); + + dst->u[0].f = src3->value.u[0].f; + for (k = 0; k < src1->node.data_type->dimx; ++k) +@@ -774,8 +774,8 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -841,8 +841,8 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co + { + unsigned int k; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -877,8 +877,8 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -916,8 +916,8 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con + { + unsigned int k; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -955,8 +955,8 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); +- assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); ++ VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -986,8 +986,8 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1024,8 +1024,8 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1063,8 +1063,8 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1105,8 +1105,8 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + +- assert(type == src1->node.data_type->e.numeric.type); +- assert(type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1139,8 +1139,8 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); +- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1175,9 +1175,9 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + { + unsigned int k; + +- assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); +- assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); +- assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); + + for (k = 0; k < dst_type->dimx; ++k) + dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; +@@ -1190,8 +1190,8 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c + { + unsigned int k; + +- assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); +- assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); ++ VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); ++ VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + + for (k = 0; k < dst_type->dimx; ++k) + { +@@ -1239,7 +1239,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + { + if (expr->operands[i].node->type != HLSL_IR_CONSTANT) + return false; +- assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); + } + } + arg1 = hlsl_ir_constant(expr->operands[0].node); +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index b3b745fc1b2..c1b8582af6d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -19,9 +19,73 @@ + #include "vkd3d_shader_private.h" + #include "vkd3d_types.h" + +-bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) ++static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, ++ unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) ++{ ++ const struct vkd3d_shader_spirv_target_info *spirv_info; ++ struct vkd3d_shader_parameter1 *parameters; ++ ++ *ret_count = 0; ++ *ret_parameters = NULL; ++ ++ if (!(spirv_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO)) || !spirv_info->parameter_count) ++ return VKD3D_OK; ++ ++ if (!(parameters = vkd3d_calloc(spirv_info->parameter_count, sizeof(*parameters)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ for (unsigned int i = 0; i < spirv_info->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter *src = &spirv_info->parameters[i]; ++ struct vkd3d_shader_parameter1 *dst = ¶meters[i]; ++ ++ dst->name = src->name; ++ dst->type = src->type; ++ dst->data_type = src->data_type; ++ ++ if (src->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ dst->u.immediate_constant = src->u.immediate_constant; ++ } ++ else if (src->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) ++ { ++ dst->u.specialization_constant = src->u.specialization_constant; ++ } ++ else ++ { ++ ERR("Invalid parameter type %#x.\n", src->type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ } ++ ++ *ret_count = spirv_info->parameter_count; ++ *ret_parameters = parameters; ++ ++ return VKD3D_OK; ++} ++ ++bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, ++ const struct vkd3d_shader_version *version, unsigned int reserve) + { + memset(program, 0, sizeof(*program)); ++ ++ if (compile_info) ++ { ++ const struct vkd3d_shader_parameter_info *parameter_info; ++ ++ if ((parameter_info = vkd3d_find_struct(compile_info->next, PARAMETER_INFO))) ++ { ++ program->parameter_count = parameter_info->parameter_count; ++ program->parameters = parameter_info->parameters; ++ } ++ else ++ { ++ if (convert_parameter_info(compile_info, &program->parameter_count, &program->parameters) < 0) ++ return false; ++ program->free_parameters = true; ++ } ++ } ++ + program->shader_version = *version; + return shader_instruction_array_init(&program->instructions, reserve); + } +@@ -30,6 +94,8 @@ void vsir_program_cleanup(struct vsir_program *program) + { + size_t i; + ++ if (program->free_parameters) ++ vkd3d_free((void *)program->parameters); + for (i = 0; i < program->block_name_count; ++i) + vkd3d_free((void *)program->block_names[i]); + vkd3d_free(program->block_names); +@@ -39,6 +105,18 @@ void vsir_program_cleanup(struct vsir_program *program) + shader_signature_cleanup(&program->patch_constant_signature); + } + ++const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( ++ const struct vsir_program *program, enum vkd3d_shader_parameter_name name) ++{ ++ for (unsigned int i = 0; i < program->parameter_count; ++i) ++ { ++ if (program->parameters[i].name == name) ++ return &program->parameters[i]; ++ } ++ ++ return NULL; ++} ++ + static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) + { + return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; +@@ -46,9 +124,9 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade + + static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) +- || handler_idx == VKD3DSIH_HS_DECLS; ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) ++ || opcode == VKD3DSIH_HS_DECLS; + } + + static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) +@@ -60,9 +138,9 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i + + static bool vsir_instruction_init_with_params(struct vsir_program *program, + struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, +- enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) ++ enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) + { +- vsir_instruction_init(ins, location, handler_idx); ++ vsir_instruction_init(ins, location, opcode); + ins->dst_count = dst_count; + ins->src_count = src_count; + +@@ -287,7 +365,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro + mul_ins = &instructions->elements[pos]; + add_ins = &instructions->elements[pos + 1]; + +- mul_ins->handler_idx = VKD3DSIH_MUL; ++ mul_ins->opcode = VKD3DSIH_MUL; + mul_ins->src_count = 2; + + if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) +@@ -311,6 +389,58 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro + return VKD3D_OK; + } + ++static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *program, ++ struct vkd3d_shader_instruction *sincos) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = sincos - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int s; ++ ++ if (sincos->dst_count != 1) ++ return VKD3D_OK; ++ ++ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &instructions->elements[pos + 1]; ++ ++ if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_SINCOS, 2, 1))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins->flags = sincos->flags; ++ ++ *ins->src = *sincos->src; ++ /* Set the source swizzle to replicate the first component. */ ++ s = vsir_swizzle_get_component(sincos->src->swizzle, 0); ++ ins->src->swizzle = vkd3d_shader_create_swizzle(s, s, s, s); ++ ++ if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_1) ++ { ++ ins->dst[0] = *sincos->dst; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_1; ++ } ++ else ++ { ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); ++ } ++ ++ if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_0) ++ { ++ ins->dst[1] = *sincos->dst; ++ ins->dst[1].write_mask = VKD3DSP_WRITEMASK_0; ++ } ++ else ++ { ++ vsir_dst_param_init(&ins->dst[1], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); ++ } ++ ++ /* Make the original instruction no-op */ ++ vkd3d_shader_instruction_make_nop(sincos); ++ ++ return VKD3D_OK; ++} ++ + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) + { +@@ -322,7 +452,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + { + struct vkd3d_shader_instruction *ins = &instructions->elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_IFC: + if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) +@@ -339,11 +469,18 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr + return ret; + break; + ++ case VKD3DSIH_DCL: + case VKD3DSIH_DCL_CONSTANT_BUFFER: ++ case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_TEMPS: + vkd3d_shader_instruction_make_nop(ins); + break; + ++ case VKD3DSIH_SINCOS: ++ if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) ++ return ret; ++ break; ++ + default: + break; + } +@@ -492,26 +629,26 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal + struct shader_phase_location *loc; + bool b; + +- if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) ++ if (ins->opcode == VKD3DSIH_HS_FORK_PHASE || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) + { + b = flattener_is_in_fork_or_join_phase(normaliser); + /* Reset the phase info. */ + normaliser->phase_body_idx = ~0u; +- normaliser->phase = ins->handler_idx; ++ normaliser->phase = ins->opcode; + normaliser->instance_count = 1; + /* Leave the first occurrence and delete the rest. */ + if (b) + vkd3d_shader_instruction_make_nop(ins); + return; + } +- else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT +- || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) ++ else if (ins->opcode == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT ++ || ins->opcode == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + { + normaliser->instance_count = ins->declaration.count + !ins->declaration.count; + vkd3d_shader_instruction_make_nop(ins); + return; + } +- else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( ++ else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( + &ins->declaration.dst.reg)) + { + vkd3d_shader_instruction_make_nop(ins); +@@ -524,7 +661,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal + if (normaliser->phase_body_idx == ~0u) + normaliser->phase_body_idx = index; + +- if (ins->handler_idx == VKD3DSIH_RET) ++ if (ins->opcode == VKD3DSIH_RET) + { + normaliser->last_ret_location = ins->location; + vkd3d_shader_instruction_make_nop(ins); +@@ -666,6 +803,12 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne + dst->write_mask = VKD3DSP_WRITEMASK_0; + } + ++static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) ++{ ++ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ src->reg.idx[0].offset = idx; ++} ++ + static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) + { + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); +@@ -678,12 +821,18 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 + src->reg.u.immconst_u32[0] = value; + } + ++static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) ++{ ++ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); ++ src->reg.idx[0].offset = idx; ++} ++ + void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, +- enum vkd3d_shader_opcode handler_idx) ++ enum vkd3d_shader_opcode opcode) + { + memset(ins, 0, sizeof(*ins)); + ins->location = *location; +- ins->handler_idx = handler_idx; ++ ins->opcode = opcode; + } + + static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, +@@ -770,7 +919,7 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param + if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) + { + /* The TPF reader validates idx_count. */ +- assert(reg->idx_count == 1); ++ VKD3D_ASSERT(reg->idx_count == 1); + reg->idx[1] = reg->idx[0]; + /* The control point id param is implicit here. Avoid later complications by inserting it. */ + reg->idx[0].offset = 0; +@@ -865,12 +1014,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + { + ins = &instructions->elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- normaliser.phase = ins->handler_idx; ++ normaliser.phase = ins->opcode; + break; + default: + if (vsir_instruction_is_dcl(ins)) +@@ -888,7 +1037,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i + { + ins = &instructions->elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + input_control_point_count = ins->declaration.count; +@@ -992,16 +1141,16 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u + { + unsigned int i, j, r, c, component_idx, component_count; + +- assert(write_mask <= VKD3DSP_WRITEMASK_ALL); ++ VKD3D_ASSERT(write_mask <= VKD3DSP_WRITEMASK_ALL); + component_idx = vsir_write_mask_get_component_idx(write_mask); + component_count = vsir_write_mask_component_count(write_mask); + +- assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); ++ VKD3D_ASSERT(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); + + if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) + { + /* Validated in the TPF reader. */ +- assert(range_map[register_idx][component_idx] != UINT8_MAX); ++ VKD3D_ASSERT(range_map[register_idx][component_idx] != UINT8_MAX); + return; + } + if (range_map[register_idx][component_idx] == register_count) +@@ -1021,7 +1170,7 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u + /* A synthetic patch constant range which overlaps an existing range can start upstream of it + * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. + * The latter is validated in the TPF reader. */ +- assert(!range_map[r][c] || !is_dcl_indexrange); ++ VKD3D_ASSERT(!range_map[r][c] || !is_dcl_indexrange); + range_map[r][c] = UINT8_MAX; + } + } +@@ -1224,7 +1373,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map + + TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, + e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); +- assert(!(e->mask & f->mask)); ++ VKD3D_ASSERT(!(e->mask & f->mask)); + + e->mask |= f->mask; + e->used_mask |= f->used_mask; +@@ -1258,7 +1407,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map + continue; + + register_count = range_map_get_register_count(range_map, e->register_index, e->mask); +- assert(register_count != UINT8_MAX); ++ VKD3D_ASSERT(register_count != UINT8_MAX); + register_count += !register_count; + + if (register_count > 1) +@@ -1281,7 +1430,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map + static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, + unsigned int id_idx, unsigned int register_index) + { +- assert(id_idx < ARRAY_SIZE(reg->idx) - 1); ++ VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); + + /* For a relative-addressed register index, move the id up a slot to separate it from the address, + * because rel_addr can be replaced with a constant offset in some cases. */ +@@ -1388,7 +1537,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + if (is_io_dcl) + { + /* Validated in the TPF reader. */ +- assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); ++ VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); + + if (dcl_params[element_idx]) + { +@@ -1413,7 +1562,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par + else + { + /* The control point id param. */ +- assert(reg->idx[0].rel_addr); ++ VKD3D_ASSERT(reg->idx[0].rel_addr); + } + id_idx = 1; + } +@@ -1526,7 +1675,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + struct vkd3d_shader_register *reg; + unsigned int i; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL_INPUT: + if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) +@@ -1560,7 +1709,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- normaliser->phase = ins->handler_idx; ++ normaliser->phase = ins->opcode; + memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); + memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); + memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); +@@ -1576,7 +1725,33 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi + } + } + +-static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) ++static bool use_flat_interpolation(const struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ static const struct vkd3d_shader_location no_loc; ++ const struct vkd3d_shader_parameter1 *parameter; ++ ++ if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) ++ return false; ++ ++ if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported flat interpolation parameter type %#x.\n", parameter->type); ++ return false; ++ } ++ if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); ++ return false; ++ } ++ ++ return parameter->u.immediate_constant.u.u32; ++} ++ ++static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) + { + struct io_normaliser normaliser = {program->instructions}; + struct vkd3d_shader_instruction *ins; +@@ -1594,7 +1769,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + { + ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: + normaliser.output_control_point_count = ins->declaration.count; +@@ -1608,7 +1783,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + /* fall through */ + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- normaliser.phase = ins->handler_idx; ++ normaliser.phase = ins->opcode; + break; + default: + break; +@@ -1626,7 +1801,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; + else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) + normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; +- else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); ++ else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); + } + } + } +@@ -1639,6 +1814,18 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program + return VKD3D_ERROR_OUT_OF_MEMORY; + } + ++ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL ++ && program->shader_version.major < 4 && use_flat_interpolation(program, message_context)) ++ { ++ for (i = 0; i < program->input_signature.element_count; ++i) ++ { ++ struct signature_element *element = &program->input_signature.elements[i]; ++ ++ if (!ascii_strcasecmp(element->semantic_name, "COLOR")) ++ element->interpolation_mode = VKD3DSIM_CONSTANT; ++ } ++ } ++ + normaliser.phase = VKD3DSIH_INVALID; + for (i = 0; i < normaliser.instructions.count; ++i) + shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); +@@ -1740,7 +1927,7 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) ++ if (ins->opcode == VKD3DSIH_DEF || ins->opcode == VKD3DSIH_DEFI || ins->opcode == VKD3DSIH_DEFB) + { + struct flat_constant_def *def; + +@@ -1779,7 +1966,7 @@ static void remove_dead_code(struct vsir_program *program) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_IF: + case VKD3DSIH_LOOP: +@@ -1799,7 +1986,7 @@ static void remove_dead_code(struct vsir_program *program) + { + if (depth > 0) + { +- if (ins->handler_idx != VKD3DSIH_ELSE) ++ if (ins->opcode != VKD3DSIH_ELSE) + --depth; + vkd3d_shader_instruction_make_nop(ins); + } +@@ -1870,14 +2057,14 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_src_param *srcs; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_TEX: + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(srcs, 0, sizeof(*srcs) * 3); + +- ins->handler_idx = VKD3DSIH_SAMPLE; ++ ins->opcode = VKD3DSIH_SAMPLE; + + srcs[0] = ins->src[0]; + +@@ -1899,13 +2086,42 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr + ins->src_count = 3; + break; + ++ case VKD3DSIH_TEXLDD: ++ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ memset(srcs, 0, sizeof(*srcs) * 5); ++ ++ ins->opcode = VKD3DSIH_SAMPLE_GRAD; ++ ++ srcs[0] = ins->src[0]; ++ ++ srcs[1].reg.type = VKD3DSPR_RESOURCE; ++ srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; ++ srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; ++ srcs[1].reg.idx_count = 2; ++ srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; ++ srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; ++ srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ srcs[2].reg.type = VKD3DSPR_SAMPLER; ++ srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; ++ srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; ++ srcs[2].reg.idx_count = 2; ++ srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; ++ ++ srcs[3] = ins->src[2]; ++ srcs[4] = ins->src[3]; ++ ++ ins->src = srcs; ++ ins->src_count = 5; ++ break; ++ + case VKD3DSIH_TEXBEM: + case VKD3DSIH_TEXBEML: + case VKD3DSIH_TEXCOORD: + case VKD3DSIH_TEXDEPTH: + case VKD3DSIH_TEXDP3: + case VKD3DSIH_TEXDP3TEX: +- case VKD3DSIH_TEXLDD: + case VKD3DSIH_TEXLDL: + case VKD3DSIH_TEXM3x2PAD: + case VKD3DSIH_TEXM3x2TEX: +@@ -1919,7 +2135,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr + case VKD3DSIH_TEXREG2RGB: + vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " +- "Combined sampler instruction %#x.", ins->handler_idx); ++ "Combined sampler instruction %#x.", ins->opcode); + return VKD3D_ERROR_NOT_IMPLEMENTED; + + default: +@@ -2030,7 +2246,7 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, + { + struct vkd3d_shader_instruction *dst_ins; + +- if (instruction->handler_idx == VKD3DSIH_NOP) ++ if (instruction->opcode == VKD3DSIH_NOP) + return true; + + if (!(dst_ins = cf_flattener_require_space(flattener, 1))) +@@ -2245,9 +2461,9 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + * phase instruction, and in all other shader types begins with the first label instruction. + * Declaring an indexable temp with function scope is not considered a declaration, + * because it needs to live inside a function. */ +- if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) ++ if (!after_declarations_section && instruction->opcode != VKD3DSIH_NOP) + { +- bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP ++ bool is_function_indexable = instruction->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP + && instruction->declaration.indexable_temp.has_function_scope; + + if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) +@@ -2260,14 +2476,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + cf_info = flattener->control_flow_depth + ? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: + if (!cf_flattener_copy_instruction(flattener, instruction)) + return VKD3D_ERROR_OUT_OF_MEMORY; +- if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) ++ if (instruction->opcode != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) + after_declarations_section = false; + break; + +@@ -2601,7 +2817,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi + + static unsigned int label_from_src_param(const struct vkd3d_shader_src_param *param) + { +- assert(param->reg.type == VKD3DSPR_LABEL); ++ VKD3D_ASSERT(param->reg.type == VKD3DSPR_LABEL); + return param->reg.idx[0].offset; + } + +@@ -2662,7 +2878,7 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + unsigned int case_count, j, default_label; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: + current_label = label_from_src_param(&ins->src[0]); +@@ -2858,7 +3074,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ + + /* Only phi src/dst SSA values need be converted here. Structurisation may + * introduce new cases of undominated SSA use, which will be handled later. */ +- if (ins->handler_idx != VKD3DSIH_PHI) ++ if (ins->opcode != VKD3DSIH_PHI) + continue; + ++phi_count; + +@@ -2870,7 +3086,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ + unsigned int label; + + label = label_from_src_param(&ins->src[j + 1]); +- assert(label); ++ VKD3D_ASSERT(label); + + info = &block_info[label - 1]; + +@@ -2907,7 +3123,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ + for (j = 0; j < ins->src_count; ++j) + materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: + current_label = label_from_src_param(&ins->src[0]); +@@ -3027,7 +3243,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int + + byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); + +- assert(label); ++ VKD3D_ASSERT(label); + memset(block, 0, sizeof(*block)); + block->label = label; + vsir_block_list_init(&block->predecessors); +@@ -3311,7 +3527,7 @@ static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_blo + struct vsir_block *successor = &cfg->blocks[target - 1]; + enum vkd3d_result ret; + +- assert(successor->label != 0); ++ VKD3D_ASSERT(successor->label != 0); + + if ((ret = vsir_block_list_add(&block->successors, successor)) < 0) + return ret; +@@ -3336,7 +3552,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) + if (block->label == 0) + continue; + +- switch (block->end->handler_idx) ++ switch (block->end->opcode) + { + case VKD3DSIH_RET: + shape = "trapezium"; +@@ -3478,7 +3694,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; + bool finish = false; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_PHI: + case VKD3DSIH_SWITCH_MONOLITHIC: +@@ -3488,11 +3704,11 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + { + unsigned int label = label_from_src_param(&instruction->src[0]); + +- assert(!current_block); +- assert(label > 0); +- assert(label <= cfg->block_count); ++ VKD3D_ASSERT(!current_block); ++ VKD3D_ASSERT(label > 0); ++ VKD3D_ASSERT(label <= cfg->block_count); + current_block = &cfg->blocks[label - 1]; +- assert(current_block->label == 0); ++ VKD3D_ASSERT(current_block->label == 0); + if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) + goto fail; + current_block->begin = &program->instructions.elements[i + 1]; +@@ -3503,7 +3719,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + + case VKD3DSIH_BRANCH: + case VKD3DSIH_RET: +- assert(current_block); ++ VKD3D_ASSERT(current_block); + current_block->end = instruction; + current_block = NULL; + break; +@@ -3511,7 +3727,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- assert(!current_block); ++ VKD3D_ASSERT(!current_block); + finish = true; + break; + +@@ -3533,7 +3749,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program + if (block->label == 0) + continue; + +- switch (block->end->handler_idx) ++ switch (block->end->opcode) + { + case VKD3DSIH_RET: + break; +@@ -3581,7 +3797,7 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru + { + size_t i; + +- assert(current->label != 0); ++ VKD3D_ASSERT(current->label != 0); + + if (current == reference) + return; +@@ -3796,7 +4012,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) + /* Do not count back edges. */ + if (cfg->loops_by_header[i] != SIZE_MAX) + { +- assert(in_degrees[i] > 0); ++ VKD3D_ASSERT(in_degrees[i] > 0); + in_degrees[i] -= 1; + } + +@@ -3882,7 +4098,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) + + inner_stack_item->seen_count += new_seen_count; + +- assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); ++ VKD3D_ASSERT(inner_stack_item->seen_count <= inner_stack_item->loop->count); + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + +@@ -3902,7 +4118,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) + if (vsir_block_dominates(successor, block)) + continue; + +- assert(in_degrees[successor->label - 1] > 0); ++ VKD3D_ASSERT(in_degrees[successor->label - 1] > 0); + --in_degrees[successor->label - 1]; + + if (in_degrees[successor->label - 1] == 0) +@@ -3923,7 +4139,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) + goto fail; + } + +- assert(sorter.stack_count == 0); ++ VKD3D_ASSERT(sorter.stack_count == 0); + + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); +@@ -3993,7 +4209,7 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ + if (vsir_block_dominates(successor, block)) + continue; + +- assert(block->order_pos < successor->order_pos); ++ VKD3D_ASSERT(block->order_pos < successor->order_pos); + + /* Jumping from a block to the following one is always + * possible, so nothing to do. */ +@@ -4066,7 +4282,7 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ + { + if (interval->synthetic) + interval->begin = min(begin, interval->begin); +- assert(begin >= interval->begin); ++ VKD3D_ASSERT(begin >= interval->begin); + } + } + +@@ -4119,7 +4335,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block + break; + } + +- assert(action->target != UINT_MAX); ++ VKD3D_ASSERT(action->target != UINT_MAX); + action->jump_type = JUMP_CONTINUE; + } + else +@@ -4141,7 +4357,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block + + if (action->target == UINT_MAX) + { +- assert(successor->order_pos == block->order_pos + 1); ++ VKD3D_ASSERT(successor->order_pos == block->order_pos + 1); + action->jump_type = JUMP_NONE; + } + else +@@ -4168,7 +4384,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + struct vsir_block *block = cfg->order.blocks[i]; + struct vsir_cfg_structure *structure; + +- assert(stack_depth > 0); ++ VKD3D_ASSERT(stack_depth > 0); + + /* Open loop intervals. */ + while (open_interval_idx < cfg->loop_interval_count) +@@ -4192,7 +4408,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + structure->u.block = block; + + /* Generate between zero and two jump instructions. */ +- switch (block->end->handler_idx) ++ switch (block->end->opcode) + { + case VKD3DSIH_BRANCH: + { +@@ -4227,7 +4443,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + * selection ladders. */ + if (action_true.successor == action_false.successor) + { +- assert(action_true.jump_type == action_false.jump_type); ++ VKD3D_ASSERT(action_true.jump_type == action_false.jump_type); + } + else + { +@@ -4243,7 +4459,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; + struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; + +- assert(inner_loop->type == STRUCTURE_TYPE_LOOP); ++ VKD3D_ASSERT(inner_loop->type == STRUCTURE_TYPE_LOOP); + + /* Otherwise, if one of the branches is + * continueing the inner loop we're inside, +@@ -4260,7 +4476,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + action_false = tmp; + } + +- assert(action_true.jump_type != JUMP_NONE); ++ VKD3D_ASSERT(action_true.jump_type != JUMP_NONE); + + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) + goto fail; +@@ -4300,8 +4516,8 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + } + } + +- assert(stack_depth == 0); +- assert(open_interval_idx == cfg->loop_interval_count); ++ VKD3D_ASSERT(stack_depth == 0); ++ VKD3D_ASSERT(open_interval_idx == cfg->loop_interval_count); + + if (TRACE_ON()) + vsir_cfg_dump_structured_program(cfg); +@@ -4325,7 +4541,7 @@ static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, + && !last->u.jump.condition && last->u.jump.target == target) + { + --list->count; +- assert(cfg->loop_intervals[target].target_count > 0); ++ VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); + --cfg->loop_intervals[target].target_count; + } + } +@@ -4366,7 +4582,7 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg + size_t pos = list->count - 1; + + selection = &list->structures[pos]; +- assert(selection->type == STRUCTURE_TYPE_SELECTION); ++ VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); + + if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); + else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); +@@ -4387,19 +4603,19 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg + /* Pointer `selection' could have been invalidated by the append + * operation. */ + selection = &list->structures[pos]; +- assert(selection->type == STRUCTURE_TYPE_SELECTION); ++ VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); + + if (if_target == max_target) + { + --selection->u.selection.if_body.count; +- assert(cfg->loop_intervals[if_target].target_count > 0); ++ VKD3D_ASSERT(cfg->loop_intervals[if_target].target_count > 0); + --cfg->loop_intervals[if_target].target_count; + } + + if (else_target == max_target) + { + --selection->u.selection.else_body.count; +- assert(cfg->loop_intervals[else_target].target_count > 0); ++ VKD3D_ASSERT(cfg->loop_intervals[else_target].target_count > 0); + --cfg->loop_intervals[else_target].target_count; + } + +@@ -4507,7 +4723,7 @@ static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, + } + + target = trailing_break->u.jump.target; +- assert(cfg->loop_intervals[target].target_count > 0); ++ VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); + + /* If the loop is not targeted by any jump, we can remove it. The + * trailing `break' then targets another loop, so we have to keep +@@ -4674,7 +4890,7 @@ static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_stru + break; + for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) + { +- assert(l->type == STRUCTURE_TYPE_LOOP); ++ VKD3D_ASSERT(l->type == STRUCTURE_TYPE_LOOP); + l->u.loop.needs_trampoline = true; + } + break; +@@ -4714,7 +4930,7 @@ static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_struct + case STRUCTURE_TYPE_JUMP: + if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) + break; +- assert(loop && loop->type == STRUCTURE_TYPE_LOOP); ++ VKD3D_ASSERT(loop && loop->type == STRUCTURE_TYPE_LOOP); + if (loop->u.loop.needs_trampoline) + structure->u.jump.needs_launcher = true; + break; +@@ -4912,7 +5128,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, + break; + + case JUMP_RET: +- assert(!jump->condition); ++ VKD3D_ASSERT(!jump->condition); + opcode = VKD3DSIH_RET; + break; + +@@ -5049,22 +5265,22 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: +- assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); ++ VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); + TRACE("Structurizing a non-hull shader.\n"); + if ((ret = vsir_program_structurize_function(program, message_context, + &target, &i)) < 0) + goto fail; +- assert(i == program->instructions.count); ++ VKD3D_ASSERT(i == program->instructions.count); + break; + + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); +- TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); ++ VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); ++ TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); + target.instructions[target.ins_count++] = *ins; + ++i; + if ((ret = vsir_program_structurize_function(program, message_context, +@@ -5222,22 +5438,22 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + +- switch (ins->handler_idx) ++ switch (ins->opcode) + { + case VKD3DSIH_LABEL: +- assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); ++ VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); + TRACE("Materializing undominated SSAs in a non-hull shader.\n"); + if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( + program, message_context, &i)) < 0) + return ret; +- assert(i == program->instructions.count); ++ VKD3D_ASSERT(i == program->instructions.count); + break; + + case VKD3DSIH_HS_CONTROL_POINT_PHASE: + case VKD3DSIH_HS_FORK_PHASE: + case VKD3DSIH_HS_JOIN_PHASE: +- assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); +- TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); ++ VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); ++ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); + ++i; + if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( + program, message_context, &i)) < 0) +@@ -5253,6 +5469,192 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru + return VKD3D_OK; + } + ++static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) ++{ ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ { ++ if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET ++ && !signature->elements[i].register_index) ++ { ++ *index = i; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, ++ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, ++ const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos) ++{ ++ struct vkd3d_shader_instruction_array *instructions = &program->instructions; ++ size_t pos = ret - instructions->elements; ++ struct vkd3d_shader_instruction *ins; ++ ++ static const struct ++ { ++ enum vkd3d_shader_opcode float_opcode; ++ enum vkd3d_shader_opcode uint_opcode; ++ bool swap; ++ } ++ opcodes[] = ++ { ++ [VKD3D_SHADER_COMPARISON_FUNC_EQUAL] = {VKD3DSIH_EQO, VKD3DSIH_IEQ}, ++ [VKD3D_SHADER_COMPARISON_FUNC_NOT_EQUAL] = {VKD3DSIH_NEO, VKD3DSIH_INE}, ++ [VKD3D_SHADER_COMPARISON_FUNC_GREATER_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE}, ++ [VKD3D_SHADER_COMPARISON_FUNC_LESS] = {VKD3DSIH_LTO, VKD3DSIH_ULT}, ++ [VKD3D_SHADER_COMPARISON_FUNC_LESS_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE, true}, ++ [VKD3D_SHADER_COMPARISON_FUNC_GREATER] = {VKD3DSIH_LTO, VKD3DSIH_ULT, true}, ++ }; ++ ++ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) ++ { ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[pos]; ++ ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; ++ src_param_init_const_uint(&ins->src[0], 0); ++ ++ *ret_pos = pos + 1; ++ return VKD3D_OK; ++ } ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[pos]; ++ ++ switch (ref->data_type) ++ { ++ case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: ++ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); ++ src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); ++ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); ++ break; ++ ++ case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: ++ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); ++ src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); ++ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], ++ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); ++ break; ++ ++ default: ++ FIXME("Unhandled parameter data type %#x.\n", ref->data_type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ ++ dst_param_init_ssa_bool(&ins->dst[0], program->ssa_count); ++ ins->src[opcodes[compare_func].swap ? 1 : 0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ ++ ++ins; ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); ++ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; ++ src_param_init_ssa_bool(&ins->src[0], program->ssa_count); ++ ++ ++program->ssa_count; ++ ++ ++ins; ++ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = colour_signature_idx; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].write_mask = program->output_signature.elements[colour_signature_idx].mask; ++ src_param_init_temp_float(&ins->src[0], colour_temp); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; ++ ++ *ret_pos = pos + 3; ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, ++ struct vkd3d_shader_message_context *message_context) ++{ ++ const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; ++ static const struct vkd3d_shader_location no_loc; ++ enum vkd3d_shader_comparison_func compare_func; ++ uint32_t colour_signature_idx, colour_temp; ++ struct vkd3d_shader_instruction *ins; ++ size_t new_pos; ++ int ret; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) ++ return VKD3D_OK; ++ ++ if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) ++ || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) ++ return VKD3D_OK; ++ ++ if (!(func = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC)) ++ || !(ref = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF))) ++ return VKD3D_OK; ++ ++ if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, ++ "Unsupported alpha test function parameter type %#x.\n", func->type); ++ return VKD3D_ERROR_NOT_IMPLEMENTED; ++ } ++ if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) ++ { ++ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, ++ "Invalid alpha test function parameter data type %#x.\n", func->data_type); ++ return VKD3D_ERROR_INVALID_ARGUMENT; ++ } ++ compare_func = func->u.immediate_constant.u.u32; ++ ++ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_ALWAYS) ++ return VKD3D_OK; ++ ++ /* We're going to be reading from the output, so we need to go ++ * through the whole shader and convert it to a temp. */ ++ ++ if (compare_func != VKD3D_SHADER_COMPARISON_FUNC_NEVER) ++ colour_temp = program->temp_count++; ++ ++ for (size_t i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (vsir_instruction_is_dcl(ins)) ++ continue; ++ ++ if (ins->opcode == VKD3DSIH_RET) ++ { ++ if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, ++ ref, colour_signature_idx, colour_temp, &new_pos)) < 0) ++ return ret; ++ i = new_pos; ++ continue; ++ } ++ ++ /* No need to convert it if the comparison func is NEVER; we don't ++ * read from the output in that case. */ ++ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) ++ continue; ++ ++ for (size_t j = 0; j < ins->dst_count; ++j) ++ { ++ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; ++ ++ /* Note we run after I/O normalization. */ ++ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) ++ { ++ dst->reg.type = VKD3DSPR_TEMP; ++ dst->reg.idx[0].offset = colour_temp; ++ } ++ } ++ } ++ ++ return VKD3D_OK; ++} ++ + struct validation_context + { + struct vkd3d_shader_message_context *message_context; +@@ -5641,7 +6043,7 @@ static void vsir_validate_dst_count(struct validation_context *ctx, + if (instruction->dst_count != count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, + "Invalid destination count %u for an instruction of type %#x, expected %u.", +- instruction->dst_count, instruction->handler_idx, count); ++ instruction->dst_count, instruction->opcode, count); + } + + static void vsir_validate_src_count(struct validation_context *ctx, +@@ -5650,7 +6052,7 @@ static void vsir_validate_src_count(struct validation_context *ctx, + if (instruction->src_count != count) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected %u.", +- instruction->src_count, instruction->handler_idx, count); ++ instruction->src_count, instruction->opcode, count); + } + + static bool vsir_validate_src_min_count(struct validation_context *ctx, +@@ -5660,7 +6062,7 @@ static bool vsir_validate_src_min_count(struct validation_context *ctx, + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected at least %u.", +- instruction->src_count, instruction->handler_idx, count); ++ instruction->src_count, instruction->opcode, count); + return false; + } + +@@ -5674,7 +6076,7 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, + "Invalid source count %u for an instruction of type %#x, expected at most %u.", +- instruction->src_count, instruction->handler_idx, count); ++ instruction->src_count, instruction->opcode, count); + return false; + } + +@@ -5697,11 +6099,11 @@ static const char *name_from_cf_type(enum cf_type type) + static void vsir_validate_cf_type(struct validation_context *ctx, + const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) + { +- assert(ctx->cf_type != CF_TYPE_UNKNOWN); +- assert(expected_type != CF_TYPE_UNKNOWN); ++ VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); ++ VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); + if (ctx->cf_type != expected_type) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", +- instruction->handler_idx, name_from_cf_type(ctx->cf_type)); ++ instruction->opcode, name_from_cf_type(ctx->cf_type)); + } + + static void vsir_validate_instruction(struct validation_context *ctx) +@@ -5718,13 +6120,13 @@ static void vsir_validate_instruction(struct validation_context *ctx) + for (i = 0; i < instruction->src_count; ++i) + vsir_validate_src_param(ctx, &instruction->src[i]); + +- if (instruction->handler_idx >= VKD3DSIH_INVALID) ++ if (instruction->opcode >= VKD3DSIH_INVALID) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", +- instruction->handler_idx); ++ instruction->opcode); + } + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_HS_DECLS: + case VKD3DSIH_HS_CONTROL_POINT_PHASE: +@@ -5733,12 +6135,14 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_dst_count(ctx, instruction, 0); + vsir_validate_src_count(ctx, instruction, 0); + if (version->type != VKD3D_SHADER_TYPE_HULL) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.", +- instruction->handler_idx); ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, ++ "Phase instruction %#x is only valid in a hull shader.", ++ instruction->opcode); + if (ctx->depth != 0) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.", +- instruction->handler_idx); +- ctx->phase = instruction->handler_idx; ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "Phase instruction %#x must appear to top level.", ++ instruction->opcode); ++ ctx->phase = instruction->opcode; + ctx->dcl_temps_found = false; + return; + +@@ -5812,7 +6216,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + && ctx->phase == VKD3DSIH_INVALID) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Instruction %#x appear before any phase instruction in a hull shader.", +- instruction->handler_idx); ++ instruction->opcode); + + /* We support two different control flow types in shaders: + * block-based, like DXIL and SPIR-V, and structured, like D3DBC +@@ -5824,7 +6228,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + * block, but need for that hasn't arisen yet, so we don't. */ + if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) + { +- if (instruction->handler_idx == VKD3DSIH_LABEL) ++ if (instruction->opcode == VKD3DSIH_LABEL) + ctx->cf_type = CF_TYPE_BLOCKS; + else + ctx->cf_type = CF_TYPE_STRUCTURED; +@@ -5832,7 +6236,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + + if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) + { +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_LABEL: + if (ctx->inside_block) +@@ -5844,20 +6248,22 @@ static void vsir_validate_instruction(struct validation_context *ctx) + case VKD3DSIH_BRANCH: + case VKD3DSIH_SWITCH_MONOLITHIC: + if (!ctx->inside_block) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", +- instruction->handler_idx); ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "Invalid instruction %#x outside any block.", ++ instruction->opcode); + ctx->inside_block = false; + break; + + default: + if (!ctx->inside_block) +- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", +- instruction->handler_idx); ++ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, ++ "Invalid instruction %#x outside any block.", ++ instruction->opcode); + break; + } + } + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DCL_TEMPS: + vsir_validate_dst_count(ctx, instruction, 0); +@@ -5877,7 +6283,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, 1); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_IFC: +@@ -5896,7 +6302,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); + else +- ctx->blocks[ctx->depth - 1] = instruction->handler_idx; ++ ctx->blocks[ctx->depth - 1] = instruction->opcode; + break; + + case VKD3DSIH_ENDIF: +@@ -5915,7 +6321,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_ENDLOOP: +@@ -5934,7 +6340,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, 1); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_ENDREP: +@@ -5953,7 +6359,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) + vsir_validate_src_count(ctx, instruction, 1); + if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) + return; +- ctx->blocks[ctx->depth++] = instruction->handler_idx; ++ ctx->blocks[ctx->depth++] = instruction->opcode; + break; + + case VKD3DSIH_ENDSWITCH: +@@ -6225,7 +6631,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t + return result; + } + +- if ((result = vsir_program_normalise_io_registers(program)) < 0) ++ if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) + return result; + + if ((result = instruction_array_normalise_flat_constants(program)) < 0) +@@ -6241,6 +6647,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t + return result; + } + ++ if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) ++ return result; ++ + if (TRACE_ON()) + vkd3d_shader_trace(program); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h +index 4860cf5f90e..9806614a35b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.h ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h +@@ -141,7 +141,7 @@ void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location + + static inline struct preproc_file *preproc_get_top_file(struct preproc_ctx *ctx) + { +- assert(ctx->file_count); ++ VKD3D_ASSERT(ctx->file_count); + return &ctx->file_stack[ctx->file_count - 1]; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l +index be50d3b9020..7fc963192cf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l +@@ -20,6 +20,7 @@ + + %{ + ++#include "preproc.h" + #include "preproc.tab.h" + + #undef ERROR /* defined in wingdi.h */ +@@ -408,7 +409,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + } + ctx->last_was_eof = false; + +- assert(ctx->file_count); ++ VKD3D_ASSERT(ctx->file_count); + if (!(token = preproc_lexer_lex(lval, lloc, scanner))) + { + ctx->last_was_eof = true; +@@ -646,7 +647,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) + { + struct preproc_text *current_arg = NULL; + +- assert(func_state->macro->arg_count); ++ VKD3D_ASSERT(func_state->macro->arg_count); + + if (func_state->arg_count < func_state->macro->arg_count) + current_arg = &func_state->macro->arg_values[func_state->arg_count]; +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y +index 009c35ffb97..366e351e3b5 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.y ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y +@@ -119,7 +119,7 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati + macro->body.text = *body; + macro->body.location = *body_loc; + ret = rb_put(&ctx->macros, name, ¯o->entry); +- assert(!ret); ++ VKD3D_ASSERT(!ret); + return true; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 984a4f894f6..bc8a7a5b28c 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -313,7 +313,7 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, + struct vkd3d_spirv_chunk *chunk; + size_t src_location = 0; + +- assert(list_empty(&dst_stream->inserted_chunks)); ++ VKD3D_ASSERT(list_empty(&dst_stream->inserted_chunks)); + + LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) + src_word_count += chunk->word_count; +@@ -322,16 +322,16 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, + dst_stream->word_count + src_word_count, sizeof(*dst_stream->words))) + return false; + +- assert(dst_stream->word_count + src_word_count <= dst_stream->capacity); ++ VKD3D_ASSERT(dst_stream->word_count + src_word_count <= dst_stream->capacity); + LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) + { +- assert(src_location <= chunk->location); ++ VKD3D_ASSERT(src_location <= chunk->location); + word_count = chunk->location - src_location; + memcpy(&dst_stream->words[dst_stream->word_count], &src_stream->words[src_location], + word_count * sizeof(*src_stream->words)); + dst_stream->word_count += word_count; + src_location += word_count; +- assert(src_location == chunk->location); ++ VKD3D_ASSERT(src_location == chunk->location); + + memcpy(&dst_stream->words[dst_stream->word_count], chunk->words, + chunk->word_count * sizeof(*chunk->words)); +@@ -464,7 +464,7 @@ static void vkd3d_spirv_set_execution_model(struct vkd3d_spirv_builder *builder, + + static uint32_t vkd3d_spirv_opcode_word(SpvOp op, unsigned int word_count) + { +- assert(!(op & ~SpvOpCodeMask)); ++ VKD3D_ASSERT(!(op & ~SpvOpCodeMask)); + return (word_count << SpvWordCountShift) | op; + } + +@@ -538,7 +538,7 @@ static int vkd3d_spirv_declaration_compare(const void *key, const struct rb_entr + return ret; + if ((ret = vkd3d_u32_compare(a->parameter_count, b->parameter_count))) + return ret; +- assert(a->parameter_count <= ARRAY_SIZE(a->parameters)); ++ VKD3D_ASSERT(a->parameter_count <= ARRAY_SIZE(a->parameters)); + return memcmp(&a->parameters, &b->parameters, a->parameter_count * sizeof(*a->parameters)); + } + +@@ -554,7 +554,7 @@ static void vkd3d_spirv_insert_declaration(struct vkd3d_spirv_builder *builder, + { + struct vkd3d_spirv_declaration *d; + +- assert(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); ++ VKD3D_ASSERT(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); + + if (!(d = vkd3d_malloc(sizeof(*d)))) + return; +@@ -823,7 +823,7 @@ static uint32_t vkd3d_spirv_build_op_tr2v(struct vkd3d_spirv_builder *builder, + static void vkd3d_spirv_begin_function_stream_insertion(struct vkd3d_spirv_builder *builder, + size_t location) + { +- assert(builder->insertion_location == ~(size_t)0); ++ VKD3D_ASSERT(builder->insertion_location == ~(size_t)0); + + if (vkd3d_spirv_stream_current_location(&builder->function_stream) == location) + return; +@@ -1166,7 +1166,7 @@ static uint32_t vkd3d_spirv_get_op_constant(struct vkd3d_spirv_builder *builder, + static uint32_t vkd3d_spirv_build_op_constant64(struct vkd3d_spirv_builder *builder, + uint32_t result_type, const uint32_t *values, unsigned int value_count) + { +- assert(value_count == 2); ++ VKD3D_ASSERT(value_count == 2); + return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, + SpvOpConstant, result_type, values, value_count); + } +@@ -1583,13 +1583,13 @@ static uint32_t vkd3d_spirv_build_image_instruction(struct vkd3d_spirv_builder * + unsigned int index = 0, i; + uint32_t w[10]; + +- assert(operand_count <= ARRAY_SIZE(w)); ++ VKD3D_ASSERT(operand_count <= ARRAY_SIZE(w)); + for (i = 0; i < operand_count; ++i) + w[index++] = operands[i]; + + if (image_operands_mask) + { +- assert(index + 1 + image_operand_count <= ARRAY_SIZE(w)); ++ VKD3D_ASSERT(index + 1 + image_operand_count <= ARRAY_SIZE(w)); + w[index++] = image_operands_mask; + for (i = 0; i < image_operand_count; ++i) + w[index++] = image_operands[i]; +@@ -1606,9 +1606,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample(struct vkd3d_spirv_builder *bu + const uint32_t operands[] = {sampled_image_id, coordinate_id}; + + if (op == SpvOpImageSampleExplicitLod) +- assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); ++ VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); + else +- assert(op == SpvOpImageSampleImplicitLod); ++ VKD3D_ASSERT(op == SpvOpImageSampleImplicitLod); + + return vkd3d_spirv_build_image_instruction(builder, op, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +@@ -1621,9 +1621,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample_dref(struct vkd3d_spirv_builde + const uint32_t operands[] = {sampled_image_id, coordinate_id, dref_id}; + + if (op == SpvOpImageSampleDrefExplicitLod) +- assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); ++ VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); + else +- assert(op == SpvOpImageSampleDrefImplicitLod); ++ VKD3D_ASSERT(op == SpvOpImageSampleDrefImplicitLod); + + return vkd3d_spirv_build_image_instruction(builder, op, result_type, + operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); +@@ -1752,6 +1752,22 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu + return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); + } + ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_swap(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t op_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadSwap, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, op_id); ++} ++ ++static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(struct vkd3d_spirv_builder *builder, ++ uint32_t result_type, uint32_t val_id, uint32_t index_id) ++{ ++ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); ++ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadBroadcast, result_type, ++ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, index_id); ++} ++ + static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id) + { +@@ -1884,7 +1900,7 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, + } + else + { +- assert(component_type != VKD3D_SHADER_COMPONENT_VOID); ++ VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); + scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); + return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); + } +@@ -2250,7 +2266,7 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, + case VKD3DSPR_OUTPUT: + case VKD3DSPR_PATCHCONST: + symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; +- assert(!reg->idx_count || symbol->key.reg.idx != ~0u); ++ VKD3D_ASSERT(!reg->idx_count || symbol->key.reg.idx != ~0u); + break; + + case VKD3DSPR_IMMCONSTBUFFER: +@@ -2377,6 +2393,7 @@ struct ssa_register_info + struct spirv_compiler + { + struct vkd3d_spirv_builder spirv_builder; ++ const struct vsir_program *program; + + struct vkd3d_shader_message_context *message_context; + struct vkd3d_shader_location location; +@@ -2403,6 +2420,11 @@ struct spirv_compiler + struct vkd3d_push_constant_buffer_binding *push_constants; + const struct vkd3d_shader_spirv_target_info *spirv_target_info; + ++ struct ++ { ++ uint32_t buffer_id; ++ } *spirv_parameter_info; ++ + bool prolog_emitted; + struct shader_signature input_signature; + struct shader_signature output_signature; +@@ -2513,13 +2535,10 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, + struct vkd3d_shader_message_context *message_context, uint64_t config_flags) + { +- const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; +- const struct shader_signature *output_signature = &program->output_signature; + const struct vkd3d_shader_interface_info *shader_interface; + const struct vkd3d_shader_descriptor_offset_info *offset_info; + const struct vkd3d_shader_spirv_target_info *target_info; + struct spirv_compiler *compiler; +- unsigned int max_element_count; + unsigned int i; + + if (!(compiler = vkd3d_malloc(sizeof(*compiler)))) +@@ -2547,13 +2566,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p + compiler->spirv_target_info = target_info; + } + +- max_element_count = max(output_signature->element_count, patch_constant_signature->element_count); +- if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) +- { +- vkd3d_free(compiler); +- return NULL; +- } +- + vkd3d_spirv_builder_init(&compiler->spirv_builder, spirv_compiler_get_entry_point_name(compiler)); + + compiler->formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT +@@ -2893,7 +2905,7 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind + + if (is_uav_counter) + { +- assert(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); ++ VKD3D_ASSERT(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); + binding_offsets = compiler->offset_info.uav_counter_offsets; + for (i = 0; i < shader_interface->uav_counter_count; ++i) + { +@@ -3011,7 +3023,7 @@ static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int i; + +- assert(0 < component_count && component_count <= VKD3D_VEC4_SIZE); ++ VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_VEC4_SIZE); + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + + switch (component_type) +@@ -3052,7 +3064,7 @@ static uint32_t spirv_compiler_get_constant64(struct spirv_compiler *compiler, + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int i; + +- assert(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); ++ VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + + if (component_type != VKD3D_SHADER_COMPONENT_DOUBLE && component_type != VKD3D_SHADER_COMPONENT_UINT64) +@@ -3274,21 +3286,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil + return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); + } + +-static const struct vkd3d_shader_parameter *spirv_compiler_get_shader_parameter( +- struct spirv_compiler *compiler, enum vkd3d_shader_parameter_name name) +-{ +- const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; +- unsigned int i; +- +- for (i = 0; info && i < info->parameter_count; ++i) +- { +- if (info->parameters[i].name == name) +- return &info->parameters[i]; +- } +- +- return NULL; +-} +- + static const struct vkd3d_spec_constant_info + { + enum vkd3d_shader_parameter_name name; +@@ -3298,6 +3295,7 @@ static const struct vkd3d_spec_constant_info + vkd3d_shader_parameters[] = + { + {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, ++ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, + }; + + static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) +@@ -3318,12 +3316,11 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com + { + if (!compiler->current_spec_constant_id) + { +- const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; + unsigned int i, id = 0; + +- for (i = 0; info && i < info->parameter_count; ++i) ++ for (i = 0; i < compiler->program->parameter_count; ++i) + { +- const struct vkd3d_shader_parameter *current = &info->parameters[i]; ++ const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; + + if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) + id = max(current->u.specialization_constant.id + 1, id); +@@ -3336,7 +3333,7 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com + } + + static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler, +- enum vkd3d_shader_parameter_name name, uint32_t spec_id) ++ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_spec_constant_info *info; +@@ -3345,7 +3342,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile + info = get_spec_constant_info(name); + default_value = info ? info->default_value : 0; + +- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ++ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); + id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); + vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); + +@@ -3364,7 +3361,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile + } + + static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, +- enum vkd3d_shader_parameter_name name, uint32_t spec_id) ++ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) + { + unsigned int i; + +@@ -3374,30 +3371,66 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler + return compiler->spec_constants[i].id; + } + +- return spirv_compiler_emit_spec_constant(compiler, name, spec_id); ++ return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type); + } + +-static uint32_t spirv_compiler_emit_uint_shader_parameter(struct spirv_compiler *compiler, +- enum vkd3d_shader_parameter_name name) ++static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) + { +- const struct vkd3d_shader_parameter *parameter; ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ unsigned int index = parameter - compiler->program->parameters; ++ uint32_t type_id, ptr_id, ptr_type_id; + +- if (!(parameter = spirv_compiler_get_shader_parameter(compiler, name))) ++ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); ++ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); ++ ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, ++ compiler->spirv_parameter_info[index].buffer_id, ++ spirv_compiler_get_constant_uint(compiler, 0)); ++ return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); ++} ++ ++static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, ++ enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type) ++{ ++ const struct vkd3d_shader_parameter1 *parameter; ++ ++ static const struct ++ { ++ enum vkd3d_data_type type; ++ } ++ type_map[] = ++ { ++ [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT}, ++ [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT}, ++ }; ++ ++ if (!(parameter = vsir_program_get_parameter(compiler->program, name))) + { + WARN("Unresolved shader parameter %#x.\n", name); + goto default_parameter; + } + ++ if (type_map[parameter->data_type].type != type) ++ ERR("Expected data type %#x for parameter %#x, got %#x.\n", type, name, parameter->data_type); ++ + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) +- return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); ++ { ++ if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) ++ return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32); ++ else ++ return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); ++ } ++ + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) +- return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id); ++ return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); ++ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) ++ return spirv_compiler_get_buffer_parameter(compiler, parameter, type); + + FIXME("Unhandled parameter type %#x.\n", parameter->type); + + default_parameter: + return spirv_compiler_get_spec_constant(compiler, +- name, spirv_compiler_alloc_spec_constant_id(compiler)); ++ name, spirv_compiler_alloc_spec_constant_id(compiler), type); + } + + static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, +@@ -3409,7 +3442,7 @@ static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *comp + uint32_t type_id, result_id; + unsigned int i; + +- assert(val_component_idx < val_component_count); ++ VKD3D_ASSERT(val_component_idx < val_component_count); + + type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); + if (val_component_count == 1) +@@ -3470,11 +3503,11 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, + struct vkd3d_symbol reg_symbol, *symbol; + struct rb_entry *entry; + +- assert(!register_is_constant_or_undef(reg)); ++ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); + + if (reg->type == VKD3DSPR_TEMP) + { +- assert(reg->idx[0].offset < compiler->temp_count); ++ VKD3D_ASSERT(reg->idx[0].offset < compiler->temp_count); + register_info->id = compiler->temp_id + reg->idx[0].offset; + register_info->storage_class = SpvStorageClassPrivate; + register_info->descriptor_array = NULL; +@@ -3605,7 +3638,7 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp + + if (reg->type == VKD3DSPR_CONSTBUFFER) + { +- assert(!reg->idx[0].rel_addr); ++ VKD3D_ASSERT(!reg->idx[0].rel_addr); + if (register_info->descriptor_array) + indexes[index_count++] = spirv_compiler_get_descriptor_index(compiler, reg, + register_info->descriptor_array, register_info->binding_base_idx, VKD3D_SHADER_RESOURCE_BUFFER); +@@ -3723,7 +3756,7 @@ static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) + { +- assert(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); ++ VKD3D_ASSERT(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); + components[component_idx++] = val_id; + } + } +@@ -3748,7 +3781,7 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil + uint32_t type_id; + unsigned int i; + +- assert(component_count <= ARRAY_SIZE(components)); ++ VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); + + for (i = 0; i < component_count; ++i) + { +@@ -3771,7 +3804,7 @@ static uint32_t spirv_compiler_emit_int_to_bool(struct spirv_compiler *compiler, + uint32_t type_id; + SpvOp op; + +- assert(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); ++ VKD3D_ASSERT(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + op = condition & VKD3D_SHADER_CONDITIONAL_OP_Z ? SpvOpIEqual : SpvOpINotEqual; +@@ -3901,7 +3934,7 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile + uint32_t values[VKD3D_VEC4_SIZE] = {0}; + unsigned int i, j; + +- assert(reg->type == VKD3DSPR_IMMCONST); ++ VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST); + + if (reg->dimension == VSIR_DIMENSION_SCALAR) + { +@@ -3929,7 +3962,7 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi + uint64_t values[VKD3D_DVEC2_SIZE] = {0}; + unsigned int i, j; + +- assert(reg->type == VKD3DSPR_IMMCONST64); ++ VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST64); + + if (reg->dimension == VSIR_DIMENSION_SCALAR) + { +@@ -3956,7 +3989,7 @@ static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t type_id; + +- assert(reg->type == VKD3DSPR_UNDEF); ++ VKD3D_ASSERT(reg->type == VKD3DSPR_UNDEF); + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); + return vkd3d_spirv_get_op_undef(builder, type_id); +@@ -3972,8 +4005,8 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, + enum vkd3d_shader_component_type component_type; + uint32_t skipped_component_mask; + +- assert(!register_is_constant_or_undef(reg)); +- assert(vsir_write_mask_component_count(write_mask) == 1); ++ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); ++ VKD3D_ASSERT(vsir_write_mask_component_count(write_mask) == 1); + + component_idx = vsir_write_mask_get_component_idx(write_mask); + component_idx = vsir_swizzle_get_component(swizzle, component_idx); +@@ -4096,8 +4129,8 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil + static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, + const struct vkd3d_shader_register *reg) + { +- assert(reg->idx[0].offset < compiler->ssa_register_count); +- assert(reg->idx_count == 1); ++ VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); ++ VKD3D_ASSERT(reg->idx_count == 1); + return &compiler->ssa_register_info[reg->idx[0].offset]; + } + +@@ -4105,7 +4138,7 @@ static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *co + const struct vkd3d_shader_register *reg, uint32_t val_id) + { + unsigned int i = reg->idx[0].offset; +- assert(i < compiler->ssa_register_count); ++ VKD3D_ASSERT(i < compiler->ssa_register_count); + compiler->ssa_register_info[i].data_type = reg->data_type; + compiler->ssa_register_info[i].id = val_id; + } +@@ -4125,10 +4158,10 @@ static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler + if (!val_id) + { + /* Should only be from a missing instruction implementation. */ +- assert(compiler->failed); ++ VKD3D_ASSERT(compiler->failed); + return 0; + } +- assert(vkd3d_swizzle_is_scalar(swizzle, reg)); ++ VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); + + reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); + +@@ -4172,6 +4205,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, + return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); + else if (reg->type == VKD3DSPR_UNDEF) + return spirv_compiler_emit_load_undef(compiler, reg, write_mask); ++ else if (reg->type == VKD3DSPR_PARAMETER) ++ return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type); + + component_count = vsir_write_mask_component_count(write_mask); + component_type = vkd3d_component_type_from_data_type(reg->data_type); +@@ -4348,7 +4383,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, + unsigned int i, src_idx, dst_idx; + uint32_t type_id, dst_val_id; + +- assert(write_mask); ++ VKD3D_ASSERT(write_mask); + + component_count = vsir_write_mask_component_count(write_mask); + dst_component_count = vsir_write_mask_component_count(dst_write_mask); +@@ -4373,7 +4408,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, + type_id = vkd3d_spirv_get_type_id(builder, component_type, dst_component_count); + dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone); + +- assert(component_count <= ARRAY_SIZE(components)); ++ VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); + + for (i = 0, src_idx = 0, dst_idx = 0; dst_idx < VKD3D_VEC4_SIZE; ++dst_idx) + { +@@ -4402,7 +4437,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, + uint32_t src_write_mask = write_mask; + uint32_t type_id; + +- assert(!register_is_constant_or_undef(reg)); ++ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); + + if (reg->type == VKD3DSPR_SSA) + { +@@ -4461,7 +4496,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, + static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, + const struct vkd3d_shader_dst_param *dst, uint32_t val_id) + { +- assert(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); ++ VKD3D_ASSERT(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); + if (dst->modifiers & VKD3DSPDM_SATURATE) + val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); + +@@ -4893,7 +4928,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler + { + struct vkd3d_shader_register r; + +- assert(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); ++ VKD3D_ASSERT(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); + + vsir_register_init(&r, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); + return spirv_compiler_get_register_id(compiler, &r); +@@ -5013,7 +5048,7 @@ static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *co + unsigned int sizes[2]; + uint32_t id; + +- assert(size_count <= ARRAY_SIZE(sizes)); ++ VKD3D_ASSERT(size_count <= ARRAY_SIZE(sizes)); + memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); + array_sizes = sizes; + sizes[0] = max(sizes[0], builtin->spirv_array_size); +@@ -5175,7 +5210,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, + use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; +- assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); ++ VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); + spirv_compiler_put_symbol(compiler, ®_symbol); + + vkd3d_spirv_build_op_name(builder, var_id, reg_type == VKD3DSPR_PATCHCONST ? "vpc%u" : "v%u", element_idx); +@@ -5221,8 +5256,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, + uint32_t write_mask; + uint32_t input_id; + +- assert(!reg->idx_count || !reg->idx[0].rel_addr); +- assert(reg->idx_count < 2); ++ VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); ++ VKD3D_ASSERT(reg->idx_count < 2); + + if (!(builtin = get_spirv_builtin_for_register(reg->type))) + { +@@ -5356,8 +5391,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, + uint32_t write_mask; + uint32_t output_id; + +- assert(!reg->idx_count || !reg->idx[0].rel_addr); +- assert(reg->idx_count < 2); ++ VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); ++ VKD3D_ASSERT(reg->idx_count < 2); + + if (!(builtin = get_spirv_builtin_for_register(reg->type))) + { +@@ -5543,7 +5578,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, + use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, + use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); + reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; +- assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); ++ VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); + + spirv_compiler_put_symbol(compiler, ®_symbol); + +@@ -5881,7 +5916,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t + function_location = spirv_compiler_get_current_function_location(compiler); + vkd3d_spirv_begin_function_stream_insertion(builder, function_location); + +- assert(!compiler->temp_count); ++ VKD3D_ASSERT(!compiler->temp_count); + compiler->temp_count = count; + for (i = 0; i < compiler->temp_count; ++i) + { +@@ -5889,7 +5924,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t + SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + if (!i) + compiler->temp_id = id; +- assert(id == compiler->temp_id + i); ++ VKD3D_ASSERT(id == compiler->temp_id + i); + + vkd3d_spirv_build_op_name(builder, id, "r%u", i); + } +@@ -5899,7 +5934,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t + + static void spirv_compiler_allocate_ssa_register_ids(struct spirv_compiler *compiler, unsigned int count) + { +- assert(!compiler->ssa_register_info); ++ VKD3D_ASSERT(!compiler->ssa_register_info); + if (!(compiler->ssa_register_info = vkd3d_calloc(count, sizeof(*compiler->ssa_register_info)))) + { + ERR("Failed to allocate SSA register value id array, count %u.\n", count); +@@ -6001,7 +6036,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com + vkd3d_spirv_build_op_decorate1(builder, member_ids[j], SpvDecorationArrayStride, 4); + descriptor_offsets_member_idx = j; + compiler->descriptor_offsets_member_id = spirv_compiler_get_constant_uint(compiler, j); +- assert(j == count - 1); ++ VKD3D_ASSERT(j == count - 1); + } + + struct_id = vkd3d_spirv_build_op_type_struct(builder, member_ids, count); +@@ -6488,7 +6523,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + + if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) + { +- assert(structure_stride); /* counters are valid only for structured buffers */ ++ VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ + + counter_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + if (spirv_compiler_is_opengl_target(compiler)) +@@ -6831,7 +6866,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, + uint32_t function_id, void_id, function_type_id; + struct vkd3d_shader_phase *phase; + +- assert(compiler->phase != instruction->handler_idx); ++ VKD3D_ASSERT(compiler->phase != instruction->opcode); + + if (!is_in_default_phase(compiler)) + spirv_compiler_leave_shader_phase(compiler); +@@ -6843,16 +6878,16 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_function(builder, void_id, function_id, + SpvFunctionControlMaskNone, function_type_id); + +- compiler->phase = instruction->handler_idx; ++ compiler->phase = instruction->opcode; + spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); + +- phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ phase = (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) + ? &compiler->control_point_phase : &compiler->patch_constant_phase; + phase->function_id = function_id; + /* The insertion location must be set after the label is emitted. */ + phase->function_location = 0; + +- if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) ++ if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) + compiler->emit_default_control_point_phase = instruction->flags; + } + +@@ -6908,7 +6943,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile + input_reg.idx[1].offset = 0; + input_id = spirv_compiler_get_register_id(compiler, &input_reg); + +- assert(input_signature->element_count == output_signature->element_count); ++ VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); + for (i = 0; i < output_signature->element_count; ++i) + { + const struct signature_element *output = &output_signature->elements[i]; +@@ -6916,8 +6951,8 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile + struct vkd3d_shader_register_info output_reg_info; + struct vkd3d_shader_register output_reg; + +- assert(input->mask == output->mask); +- assert(input->component_type == output->component_type); ++ VKD3D_ASSERT(input->mask == output->mask); ++ VKD3D_ASSERT(input->component_type == output->component_type); + + input_reg.idx[1].offset = i; + input_id = spirv_compiler_get_register_id(compiler, &input_reg); +@@ -7016,7 +7051,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + { + static const struct + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + SpvOp spirv_op; + } + alu_ops[] = +@@ -7056,7 +7091,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + + for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) + { +- if (alu_ops[i].handler_idx == instruction->handler_idx) ++ if (alu_ops[i].opcode == instruction->opcode) + return alu_ops[i].spirv_op; + } + +@@ -7065,7 +7100,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru + + static SpvOp spirv_compiler_map_logical_instruction(const struct vkd3d_shader_instruction *instruction) + { +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_AND: + return SpvOpLogicalAnd; +@@ -7085,25 +7120,25 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t val_id; + +- assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); ++ VKD3D_ASSERT(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); + + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) + { +- val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); ++ val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); + } + else if (dst->reg.data_type == VKD3D_DATA_DOUBLE) + { + /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ +- val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); ++ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); + } + else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) + { +- val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); ++ val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); + } + else if (dst->reg.data_type == VKD3D_DATA_UINT64) + { +- val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); ++ val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); + } + else + { +@@ -7126,7 +7161,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + SpvOp op = SpvOpMax; + unsigned int i; + +- if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) ++ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->opcode == VKD3DSIH_COUNTBITS) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ + FIXME("Unsupported 64-bit source for bit count.\n"); +@@ -7142,8 +7177,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + /* VSIR supports logic ops AND/OR/XOR on bool values. */ + op = spirv_compiler_map_logical_instruction(instruction); + } +- else if (instruction->handler_idx == VKD3DSIH_ITOF || instruction->handler_idx == VKD3DSIH_UTOF +- || instruction->handler_idx == VKD3DSIH_ITOI || instruction->handler_idx == VKD3DSIH_UTOU) ++ else if (instruction->opcode == VKD3DSIH_ITOF || instruction->opcode == VKD3DSIH_UTOF ++ || instruction->opcode == VKD3DSIH_ITOI || instruction->opcode == VKD3DSIH_UTOU) + { + /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, + * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ +@@ -7158,14 +7193,14 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + + if (op == SpvOpMax) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, +- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); ++ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); + return VKD3D_ERROR_INVALID_SHADER; + } + +- assert(instruction->dst_count == 1); +- assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); ++ VKD3D_ASSERT(instruction->dst_count == 1); ++ VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); + + type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + +@@ -7179,8 +7214,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil + * Microsoft fxc will compile immediate constants larger than 5 bits. + * Fixing up the constants would be more elegant, but the simplest way is + * to let this handle constants too. */ +- if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->handler_idx == VKD3DSIH_ISHL +- || instruction->handler_idx == VKD3DSIH_ISHR || instruction->handler_idx == VKD3DSIH_USHR)) ++ if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->opcode == VKD3DSIH_ISHL ++ || instruction->opcode == VKD3DSIH_ISHR || instruction->opcode == VKD3DSIH_USHR)) + { + uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, + VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); +@@ -7218,7 +7253,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( + { + static const struct + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + enum GLSLstd450 glsl_inst; + } + glsl_insts[] = +@@ -7258,7 +7293,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( + + for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) + { +- if (glsl_insts[i].handler_idx == instruction->handler_idx) ++ if (glsl_insts[i].opcode == instruction->opcode) + return glsl_insts[i].glsl_inst; + } + +@@ -7276,27 +7311,27 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp + unsigned int i, component_count; + enum GLSLstd450 glsl_inst; + +- if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI +- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) ++ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->opcode == VKD3DSIH_FIRSTBIT_HI ++ || instruction->opcode == VKD3DSIH_FIRSTBIT_LO || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI)) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ +- FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); ++ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->opcode); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, +- "64-bit source for handler %#x is not supported.", instruction->handler_idx); ++ "64-bit source for handler %#x is not supported.", instruction->opcode); + return; + } + + glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); + if (glsl_inst == GLSLstd450Bad) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + + instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); + +- assert(instruction->dst_count == 1); +- assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); ++ VKD3D_ASSERT(instruction->dst_count == 1); ++ VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); + + type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + +@@ -7306,8 +7341,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp + val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, + instr_set_id, glsl_inst, src_id, instruction->src_count); + +- if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI +- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) ++ if (instruction->opcode == VKD3DSIH_FIRSTBIT_HI ++ || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI) + { + /* In D3D bits are numbered from the most significant bit. */ + component_count = vsir_write_mask_component_count(dst->write_mask); +@@ -7415,7 +7450,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, + + if (src[0].reg.data_type != VKD3D_DATA_BOOL) + { +- if (instruction->handler_idx == VKD3DSIH_CMP) ++ if (instruction->opcode == VKD3DSIH_CMP) + condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, + vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, + spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); +@@ -7437,7 +7472,7 @@ static void spirv_compiler_emit_swapc(struct spirv_compiler *compiler, + uint32_t condition_id, src1_id, src2_id, type_id, val_id; + unsigned int component_count; + +- assert(dst[0].write_mask == dst[1].write_mask); ++ VKD3D_ASSERT(dst[0].write_mask == dst[1].write_mask); + + condition_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); +@@ -7469,14 +7504,14 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, + component_count = vsir_write_mask_component_count(dst->write_mask); + component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); + +- if (instruction->handler_idx == VKD3DSIH_DP4) ++ if (instruction->opcode == VKD3DSIH_DP4) + write_mask = VKD3DSP_WRITEMASK_ALL; +- else if (instruction->handler_idx == VKD3DSIH_DP3) ++ else if (instruction->opcode == VKD3DSIH_DP3) + write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; + else + write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; + +- assert(instruction->src_count == ARRAY_SIZE(src_ids)); ++ VKD3D_ASSERT(instruction->src_count == ARRAY_SIZE(src_ids)); + for (i = 0; i < ARRAY_SIZE(src_ids); ++i) + src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], write_mask); + +@@ -7606,8 +7641,8 @@ static void spirv_compiler_emit_int_div(struct spirv_compiler *compiler, + unsigned int component_count = 0; + SpvOp div_op, mod_op; + +- div_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; +- mod_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; ++ div_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; ++ mod_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; + + if (dst[0].reg.type != VKD3DSPR_NULL) + { +@@ -7668,8 +7703,8 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, + enum vkd3d_shader_component_type component_type; + unsigned int component_count; + +- assert(instruction->dst_count == 1); +- assert(instruction->src_count == 1); ++ VKD3D_ASSERT(instruction->dst_count == 1); ++ VKD3D_ASSERT(instruction->src_count == 1); + + /* OpConvertFToI has undefined results if the result cannot be represented + * as a signed integer, but Direct3D expects the result to saturate, +@@ -7721,8 +7756,8 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, + uint32_t src_type_id, dst_type_id, condition_type_id; + unsigned int component_count; + +- assert(instruction->dst_count == 1); +- assert(instruction->src_count == 1); ++ VKD3D_ASSERT(instruction->dst_count == 1); ++ VKD3D_ASSERT(instruction->src_count == 1); + + /* OpConvertFToU has undefined results if the result cannot be represented + * as an unsigned integer, but Direct3D expects the result to saturate, +@@ -7770,7 +7805,7 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp + SpvOp op; + + src_count = instruction->src_count; +- assert(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); ++ VKD3D_ASSERT(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); + + component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); + type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); +@@ -7778,17 +7813,17 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp + mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); + size_id = spirv_compiler_get_constant_uint(compiler, size); + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; + case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; + case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; + default: +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); ++ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, k = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) +@@ -7832,7 +7867,7 @@ static void spirv_compiler_emit_f16tof32(struct spirv_compiler *compiler, + scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); + + /* FIXME: Consider a single UnpackHalf2x16 instruction per 2 components. */ +- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); ++ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) +@@ -7866,7 +7901,7 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, + zero_id = spirv_compiler_get_constant_float(compiler, 0.0f); + + /* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */ +- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); ++ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) +@@ -7895,7 +7930,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co + unsigned int component_count; + SpvOp op; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DEQO: + case VKD3DSIH_EQO: op = SpvOpFOrdEqual; break; +@@ -7916,7 +7951,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co + case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; + case VKD3DSIH_ULT: op = SpvOpULessThan; break; + default: +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -7949,7 +7984,7 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c + src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); + src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); + val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); +- if (instruction->handler_idx == VKD3DSIH_ORD) ++ if (instruction->opcode == VKD3DSIH_ORD) + val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } +@@ -7964,7 +7999,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil + unsigned int component_count; + SpvOp op; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; + case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; +@@ -8113,6 +8148,8 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, + if (src->reg.data_type != VKD3D_DATA_BOOL) + condition_id = spirv_compiler_emit_int_to_bool(compiler, + instruction->flags, src->reg.data_type, 1, condition_id); ++ else if (instruction->flags & VKD3D_SHADER_CONDITIONAL_OP_Z) ++ condition_id = vkd3d_spirv_build_op_logical_not(builder, vkd3d_spirv_get_op_type_bool(builder), condition_id); + void_id = vkd3d_spirv_get_op_type_void(builder); + vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), + &condition_id, 1); +@@ -8262,7 +8299,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile + + static const struct instruction_info + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + SpvOp op; + bool needs_derivative_control; + } +@@ -8279,7 +8316,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile + info = NULL; + for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) + { +- if (deriv_instructions[i].handler_idx == instruction->handler_idx) ++ if (deriv_instructions[i].opcode == instruction->opcode) + { + info = &deriv_instructions[i]; + break; +@@ -8287,15 +8324,15 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile + } + if (!info) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + + if (info->needs_derivative_control) + vkd3d_spirv_enable_capability(builder, SpvCapabilityDerivativeControl); + +- assert(instruction->dst_count == 1); +- assert(instruction->src_count == 1); ++ VKD3D_ASSERT(instruction->dst_count == 1); ++ VKD3D_ASSERT(instruction->src_count == 1); + + type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); +@@ -8329,7 +8366,7 @@ static const struct vkd3d_symbol *spirv_compiler_find_resource(struct spirv_comp + + vkd3d_symbol_make_resource(&resource_key, resource_reg); + entry = rb_get(&compiler->symbol_table, &resource_key); +- assert(entry); ++ VKD3D_ASSERT(entry); + return RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); + } + +@@ -8438,8 +8475,8 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, + { + struct vkd3d_shader_register_info register_info; + +- assert(image->image_id); +- assert(sampler_reg); ++ VKD3D_ASSERT(image->image_id); ++ VKD3D_ASSERT(sampler_reg); + + if (!spirv_compiler_get_register_info(compiler, sampler_reg, ®ister_info)) + ERR("Failed to get sampler register info.\n"); +@@ -8497,7 +8534,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, + uint32_t coordinate_mask; + bool multisample; + +- multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; ++ multisample = instruction->opcode == VKD3DSIH_LD2DMS; + + spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); + +@@ -8522,7 +8559,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, + image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, + &src[2], VKD3DSP_WRITEMASK_0); + } +- assert(image_operand_count <= ARRAY_SIZE(image_operands)); ++ VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); + val_id = vkd3d_spirv_build_op_image_fetch(builder, type_id, + image.image_id, coordinate_id, operands_mask, image_operands, image_operand_count); + +@@ -8576,7 +8613,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, + spirv_compiler_prepare_image(compiler, &image, + &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_SAMPLE: + op = SpvOpImageSampleImplicitLod; +@@ -8603,7 +8640,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, + &src[3], VKD3DSP_WRITEMASK_0); + break; + default: +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -8616,7 +8653,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, + + sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); + coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); +- assert(image_operand_count <= ARRAY_SIZE(image_operands)); ++ VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); + val_id = vkd3d_spirv_build_op_image_sample(builder, op, sampled_type_id, + image.sampled_image_id, coordinate_id, operands_mask, image_operands, image_operand_count); + +@@ -8637,7 +8674,7 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, + uint32_t image_operands[2]; + SpvOp op; + +- if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) ++ if (instruction->opcode == VKD3DSIH_SAMPLE_C_LZ) + { + op = SpvOpImageSampleDrefExplicitLod; + operands_mask |= SpvImageOperandsLodMask; +@@ -8687,12 +8724,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, + uint32_t coordinate_mask; + bool extended_offset; + +- if (instruction->handler_idx == VKD3DSIH_GATHER4_C +- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) ++ if (instruction->opcode == VKD3DSIH_GATHER4_C ++ || instruction->opcode == VKD3DSIH_GATHER4_PO_C) + image_flags |= VKD3D_IMAGE_FLAG_DEPTH; + +- extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO +- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; ++ extended_offset = instruction->opcode == VKD3DSIH_GATHER4_PO ++ || instruction->opcode == VKD3DSIH_GATHER4_PO_C; + + addr = &src[0]; + offset = extended_offset ? &src[1] : NULL; +@@ -8801,7 +8838,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler + type_id, resource_symbol->info.resource.structure_stride, + &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + +- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); ++ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) +@@ -8833,7 +8870,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler + type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + texel_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); +- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); ++ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) +@@ -8876,7 +8913,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, + base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + +- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); ++ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) +@@ -8939,7 +8976,7 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * + &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + data = &src[instruction->src_count - 1]; +- assert(data->reg.data_type == VKD3D_DATA_UINT); ++ VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); + val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); + + component_count = vsir_write_mask_component_count(dst->write_mask); +@@ -8963,12 +9000,11 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * + { + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); +- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); + base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + + data = &src[instruction->src_count - 1]; +- assert(data->reg.data_type == VKD3D_DATA_UINT); ++ VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); + val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); + + component_count = vsir_write_mask_component_count(dst->write_mask); +@@ -9007,7 +9043,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); +- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); + base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); + +@@ -9145,12 +9180,12 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c + uint32_t operands[3]; + SpvOp op; + +- op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC ++ op = instruction->opcode == VKD3DSIH_IMM_ATOMIC_ALLOC + ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; + + resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); + counter_id = resource_symbol->info.resource.uav_counter_id; +- assert(counter_id); ++ VKD3D_ASSERT(counter_id); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + +@@ -9211,7 +9246,7 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins + { + static const struct + { +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + SpvOp spirv_op; + } + atomic_ops[] = +@@ -9240,16 +9275,16 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins + + for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) + { +- if (atomic_ops[i].handler_idx == instruction->handler_idx) ++ if (atomic_ops[i].opcode == instruction->opcode) + return atomic_ops[i].spirv_op; + } + + return SpvOpMax; + } + +-static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) ++static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode opcode) + { +- return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; ++ return VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR; + } + + static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compiler, +@@ -9274,12 +9309,12 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + bool raw; + SpvOp op; + +- resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; ++ resource = is_imm_atomic_instruction(instruction->opcode) ? &dst[1] : &dst[0]; + + op = spirv_compiler_map_atomic_instruction(instruction); + if (op == SpvOpMax) + { +- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); ++ ERR("Unexpected instruction %#x.\n", instruction->opcode); + return; + } + +@@ -9315,14 +9350,14 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + if (structure_stride || raw) + { +- assert(!raw != !structure_stride); ++ VKD3D_ASSERT(!raw != !structure_stride); + coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, + type_id, structure_stride, &src[0], VKD3DSP_WRITEMASK_0, + &src[0], VKD3DSP_WRITEMASK_1); + } + else + { +- assert(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); ++ VKD3D_ASSERT(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); + coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], coordinate_mask); + } + +@@ -9360,7 +9395,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + { + WARN("Ignoring 'volatile' attribute.\n"); + spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, +- "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); ++ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->opcode); + } + + memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) +@@ -9379,7 +9414,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil + result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, + op, type_id, operands, i); + +- if (is_imm_atomic_instruction(instruction->handler_idx)) ++ if (is_imm_atomic_instruction(instruction->opcode)) + spirv_compiler_emit_store_dst(compiler, dst, result_id); + } + +@@ -9511,8 +9546,8 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co + + if (src->reg.type == VKD3DSPR_RASTERIZER) + { +- val_id = spirv_compiler_emit_uint_shader_parameter(compiler, +- VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT); ++ val_id = spirv_compiler_emit_shader_parameter(compiler, ++ VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT); + } + else + { +@@ -9684,13 +9719,13 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, + + src_ids[src_count++] = register_info.id; + +- if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) ++ if (instruction->opcode == VKD3DSIH_EVAL_CENTROID) + { + op = GLSLstd450InterpolateAtCentroid; + } + else + { +- assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); ++ VKD3D_ASSERT(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); + op = GLSLstd450InterpolateAtSample; + src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); + } +@@ -9772,7 +9807,7 @@ static void spirv_compiler_emit_emit_stream(struct spirv_compiler *compiler, + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int stream_idx; + +- if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) ++ if (instruction->opcode == VKD3DSIH_EMIT_STREAM) + stream_idx = instruction->src[0].reg.idx[0].offset; + else + stream_idx = 0; +@@ -9793,7 +9828,7 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int stream_idx; + +- if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) ++ if (instruction->opcode == VKD3DSIH_CUT_STREAM) + stream_idx = instruction->src[0].reg.idx[0].offset; + else + stream_idx = 0; +@@ -9807,9 +9842,68 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_end_primitive(builder); + } + +-static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) ++static uint32_t map_quad_read_across_direction(enum vkd3d_shader_opcode opcode) + { +- switch (handler_idx) ++ switch (opcode) ++ { ++ case VKD3DSIH_QUAD_READ_ACROSS_X: ++ return 0; ++ case VKD3DSIH_QUAD_READ_ACROSS_Y: ++ return 1; ++ case VKD3DSIH_QUAD_READ_ACROSS_D: ++ return 2; ++ default: ++ vkd3d_unreachable(); ++ } ++} ++ ++static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, direction_type_id, direction_id, val_id; ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VKD3D_DATA_UINT, 1); ++ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); ++ direction_id = map_quad_read_across_direction(instruction->opcode); ++ direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_swap(builder, type_id, val_id, direction_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compiler, ++ const struct vkd3d_shader_instruction *instruction) ++{ ++ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; ++ const struct vkd3d_shader_dst_param *dst = instruction->dst; ++ const struct vkd3d_shader_src_param *src = instruction->src; ++ uint32_t type_id, val_id, lane_id; ++ ++ if (!register_is_constant_or_undef(&src[1].reg)) ++ { ++ FIXME("Unsupported non-constant quad read lane index.\n"); ++ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, ++ "Non-constant quad read lane indices are not supported."); ++ return; ++ } ++ ++ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, ++ vsir_write_mask_component_count(dst->write_mask)); ++ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); ++ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); ++ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); ++ ++ spirv_compiler_emit_store_dst(compiler, dst, val_id); ++} ++ ++static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode opcode) ++{ ++ switch (opcode) + { + case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: + return SpvOpGroupNonUniformAllEqual; +@@ -9833,7 +9927,7 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, + + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); + +- op = map_wave_bool_op(instruction->handler_idx); ++ op = map_wave_bool_op(instruction->opcode); + type_id = vkd3d_spirv_get_op_type_bool(builder); + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, +@@ -9865,9 +9959,9 @@ static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compil + spirv_compiler_emit_store_dst(compiler, dst, val_id); + } + +-static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) ++static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode opcode, bool is_float) + { +- switch (handler_idx) ++ switch (opcode) + { + case VKD3DSIH_WAVE_ACTIVE_BIT_AND: + return SpvOpGroupNonUniformBitwiseAnd; +@@ -9905,7 +9999,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, + uint32_t type_id, val_id; + SpvOp op; + +- op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); ++ op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type)); + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); +@@ -9928,7 +10022,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, + SpvGroupOperation group_op; + uint32_t type_id, val_id; + +- group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan ++ group_op = (instruction->opcode == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan + : SpvGroupOperationReduce; + + val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); +@@ -10014,7 +10108,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + + compiler->location = instruction->location; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DCL_GLOBAL_FLAGS: + spirv_compiler_emit_dcl_global_flags(compiler, instruction); +@@ -10337,6 +10431,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_CUT_STREAM: + spirv_compiler_emit_cut_stream(compiler, instruction); + break; ++ case VKD3DSIH_QUAD_READ_ACROSS_D: ++ case VKD3DSIH_QUAD_READ_ACROSS_X: ++ case VKD3DSIH_QUAD_READ_ACROSS_Y: ++ spirv_compiler_emit_quad_read_across(compiler, instruction); ++ break; ++ case VKD3DSIH_QUAD_READ_LANE_AT: ++ spirv_compiler_emit_quad_read_lane_at(compiler, instruction); ++ break; + case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: + case VKD3DSIH_WAVE_ALL_TRUE: + case VKD3DSIH_WAVE_ANY_TRUE: +@@ -10371,7 +10473,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_WAVE_READ_LANE_FIRST: + spirv_compiler_emit_wave_read_lane_first(compiler, instruction); + break; +- case VKD3DSIH_DCL: + case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: + case VKD3DSIH_DCL_INPUT_SGV: +@@ -10381,7 +10482,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + case VKD3DSIH_DCL_OUTPUT_SIV: + case VKD3DSIH_DCL_RESOURCE_RAW: + case VKD3DSIH_DCL_RESOURCE_STRUCTURED: +- case VKD3DSIH_DCL_SAMPLER: + case VKD3DSIH_DCL_UAV_RAW: + case VKD3DSIH_DCL_UAV_STRUCTURED: + case VKD3DSIH_DCL_UAV_TYPED: +@@ -10390,9 +10490,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, + /* nothing to do */ + break; + default: +- FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); ++ FIXME("Unhandled instruction %#x.\n", instruction->opcode); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, +- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); ++ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); + break; + } + +@@ -10476,12 +10576,16 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + struct vkd3d_shader_instruction_array instructions; + enum vkd3d_shader_spirv_environment environment; + enum vkd3d_result result = VKD3D_OK; +- unsigned int i; ++ unsigned int i, max_element_count; + + if ((result = vsir_program_normalise(program, compiler->config_flags, + compile_info, compiler->message_context)) < 0) + return result; + ++ max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); ++ if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ + if (program->temp_count) + spirv_compiler_emit_temps(compiler, program->temp_count); + if (program->ssa_count) +@@ -10489,9 +10593,38 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct + + spirv_compiler_emit_descriptor_declarations(compiler); + ++ compiler->spirv_parameter_info = vkd3d_calloc(program->parameter_count, sizeof(*compiler->spirv_parameter_info)); ++ for (i = 0; i < program->parameter_count; ++i) ++ { ++ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; ++ ++ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) ++ { ++ uint32_t type_id, struct_id, ptr_type_id, var_id; ++ ++ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ++ ++ struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); ++ vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); ++ vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, ++ SpvDecorationOffset, parameter->u.buffer.offset); ++ ++ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, struct_id); ++ var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, ++ ptr_type_id, SpvStorageClassUniform, 0); ++ ++ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationDescriptorSet, parameter->u.buffer.set); ++ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationBinding, parameter->u.buffer.binding); ++ ++ compiler->spirv_parameter_info[i].buffer_id = var_id; ++ } ++ } ++ + if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ++ compiler->program = program; ++ + instructions = program->instructions; + memset(&program->instructions, 0, sizeof(program->instructions)); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index b562e815a81..d6d5bbc1c07 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -780,7 +780,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui + if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) + { + FIXME("Ignoring shader data type %#x.\n", type); +- ins->handler_idx = VKD3DSIH_NOP; ++ ins->opcode = VKD3DSIH_NOP; + return; + } + +@@ -789,7 +789,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui + if (icb_size % 4) + { + FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + +@@ -797,7 +797,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui + { + ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); + vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + icb->register_idx = 0; +@@ -1716,7 +1716,7 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( + const struct vkd3d_sm4_register_type_info *register_type_info = + get_info_from_vkd3d_register_type(lookup, vkd3d_type); + +- assert(register_type_info); ++ VKD3D_ASSERT(register_type_info); + return register_type_info->default_src_swizzle_type; + } + +@@ -2395,16 +2395,16 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + if (!(opcode_info = get_info_from_sm4_opcode(&sm4->lookup, opcode))) + { + FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + *ptr += len; + return; + } + + vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); +- if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE +- || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) +- sm4->phase = ins->handler_idx; +- sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; ++ if (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->opcode == VKD3DSIH_HS_FORK_PHASE ++ || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) ++ sm4->phase = ins->opcode; ++ sm4->has_control_point_phase |= ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE; + ins->flags = 0; + ins->coissue = false; + ins->raw = false; +@@ -2417,7 +2417,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + { + ERR("Failed to allocate src parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; +@@ -2459,7 +2459,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + { + ERR("Failed to allocate dst parameters.\n"); + vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + for (i = 0; i < ins->dst_count; ++i) +@@ -2467,7 +2467,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), + &dst_params[i]))) + { +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + dst_params[i].modifiers |= instruction_dst_modifier; +@@ -2478,7 +2478,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), + &src_params[i]))) + { +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + } +@@ -2488,12 +2488,12 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + + fail: + *ptr = sm4->end; +- ins->handler_idx = VKD3DSIH_INVALID; ++ ins->opcode = VKD3DSIH_INVALID; + return; + } + + static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, +- const uint32_t *byte_code, size_t byte_code_size, const char *source_name, ++ const uint32_t *byte_code, size_t byte_code_size, const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) + { + struct vkd3d_shader_version version; +@@ -2552,9 +2552,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro + version.minor = VKD3D_SM4_VERSION_MINOR(version_token); + + /* Estimate instruction count to avoid reallocation in most shaders. */ +- if (!vsir_program_init(program, &version, token_count / 7u + 20)) ++ if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) + return false; +- vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); ++ vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); + sm4->ptr = sm4->start; + + init_sm4_lookup_tables(&sm4->lookup); +@@ -2651,7 +2651,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + } + + if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, +- compile_info->source_name, message_context)) ++ compile_info, message_context)) + { + WARN("Failed to initialise shader parser.\n"); + free_dxbc_shader_desc(&dxbc_desc); +@@ -2693,7 +2693,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + ins = &instructions->elements[instructions->count]; + shader_sm4_read_instruction(&sm4, ins); + +- if (ins->handler_idx == VKD3DSIH_INVALID) ++ if (ins->opcode == VKD3DSIH_INVALID) + { + WARN("Encountered unrecognized or invalid instruction.\n"); + vsir_program_cleanup(program); +@@ -2762,6 +2762,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, ++ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, + }; + + for (i = 0; i < ARRAY_SIZE(register_table); ++i) +@@ -2817,6 +2818,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, ++ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, + + {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, + {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, +@@ -2885,7 +2887,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + continue; + + ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); +- assert(ret); ++ VKD3D_ASSERT(ret); + if (usage == ~0u) + continue; + usage_idx = var->semantic.index; +@@ -2896,7 +2898,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + } + else + { +- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; + } + +@@ -2973,7 +2975,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + switch (type->class) + { + case HLSL_CLASS_MATRIX: +- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); ++ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) + return D3D_SVC_MATRIX_COLUMNS; + else +@@ -2984,11 +2986,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + return D3D_SVC_VECTOR; + + case HLSL_CLASS_ARRAY: ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_STRUCT: + case HLSL_CLASS_PASS: + case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: +@@ -2997,6 +3001,11 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_VOID: ++ case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_COMPUTE_SHADER: ++ case HLSL_CLASS_DOMAIN_SHADER: ++ case HLSL_CLASS_HULL_SHADER: ++ case HLSL_CLASS_GEOMETRY_SHADER: + break; + } + vkd3d_unreachable(); +@@ -3077,7 +3086,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b + } + else + { +- assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); ++ VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); + put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); + put_u32(buffer, vkd3d_make_u32(array_size, 0)); +@@ -3178,13 +3187,14 @@ struct extern_resource + /* var is only not NULL if this resource is a whole variable, so it may be responsible for more + * than one component. */ + const struct hlsl_ir_var *var; ++ const struct hlsl_buffer *buffer; + + char *name; + struct hlsl_type *data_type; + bool is_user_packed; + + enum hlsl_regset regset; +- unsigned int id, bind_count; ++ unsigned int id, space, index, bind_count; + }; + + static int sm4_compare_extern_resources(const void *a, const void *b) +@@ -3196,7 +3206,10 @@ static int sm4_compare_extern_resources(const void *a, const void *b) + if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) + return r; + +- return vkd3d_u32_compare(aa->id, bb->id); ++ if ((r = vkd3d_u32_compare(aa->space, bb->space))) ++ return r; ++ ++ return vkd3d_u32_compare(aa->index, bb->index); + } + + static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) +@@ -3220,6 +3233,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; + struct extern_resource *extern_resources = NULL; + const struct hlsl_ir_var *var; ++ struct hlsl_buffer *buffer; + enum hlsl_regset regset; + size_t capacity = 0; + char *name; +@@ -3272,13 +3286,16 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + hlsl_release_string_buffer(ctx, name_buffer); + + extern_resources[*count].var = NULL; ++ extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = component_type; + extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + + extern_resources[*count].regset = regset; +- extern_resources[*count].id = var->regs[regset].id + regset_offset; ++ extern_resources[*count].id = var->regs[regset].id; ++ extern_resources[*count].space = var->regs[regset].space; ++ extern_resources[*count].index = var->regs[regset].index + regset_offset; + extern_resources[*count].bind_count = 1; + + ++*count; +@@ -3313,13 +3330,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + } + + extern_resources[*count].var = var; ++ extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = var->data_type; +- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; ++ /* For some reason 5.1 resources aren't marked as ++ * user-packed, but cbuffers still are. */ ++ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) ++ && !!var->reg_reservation.reg_type; + + extern_resources[*count].regset = r; + extern_resources[*count].id = var->regs[r].id; ++ extern_resources[*count].space = var->regs[r].space; ++ extern_resources[*count].index = var->regs[r].index; + extern_resources[*count].bind_count = var->bind_count[r]; + + ++*count; +@@ -3327,14 +3350,51 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + } + } + ++ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) ++ { ++ if (!buffer->reg.allocated) ++ continue; ++ ++ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, ++ sizeof(*extern_resources)))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ if (!(name = hlsl_strdup(ctx, buffer->name))) ++ { ++ sm4_free_extern_resources(extern_resources, *count); ++ *count = 0; ++ return NULL; ++ } ++ ++ extern_resources[*count].var = NULL; ++ extern_resources[*count].buffer = buffer; ++ ++ extern_resources[*count].name = name; ++ extern_resources[*count].data_type = NULL; ++ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; ++ ++ extern_resources[*count].regset = HLSL_REGSET_NUMERIC; ++ extern_resources[*count].id = buffer->reg.id; ++ extern_resources[*count].space = buffer->reg.space; ++ extern_resources[*count].index = buffer->reg.index; ++ extern_resources[*count].bind_count = 1; ++ ++ ++*count; ++ } ++ + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); + return extern_resources; + } + + static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + { +- unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; ++ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); + size_t cbuffers_offset, resources_offset, creator_offset, string_offset; ++ unsigned int cbuffer_count = 0, extern_resources_count, i, j; + size_t cbuffer_position, resource_position, creator_position; + const struct hlsl_profile_info *profile = ctx->profile; + struct vkd3d_bytecode_buffer buffer = {0}; +@@ -3354,19 +3414,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + +- resource_count += extern_resources_count; + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- { + ++cbuffer_count; +- ++resource_count; +- } + } + + put_u32(&buffer, cbuffer_count); + cbuffer_position = put_u32(&buffer, 0); +- put_u32(&buffer, resource_count); ++ put_u32(&buffer, extern_resources_count); + resource_position = put_u32(&buffer, 0); + put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), + target_types[profile->type])); +@@ -3378,7 +3434,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); + put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ + put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ +- put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ ++ put_u32(&buffer, binding_desc_size); /* size of binding desc */ + put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ + put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ + put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ +@@ -3395,21 +3451,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + const struct extern_resource *resource = &extern_resources[i]; + uint32_t flags = 0; + +- if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); +- + if (resource->is_user_packed) + flags |= D3D_SIF_USERPACKED; + + put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, sm4_resource_type(resource->data_type)); +- if (resource->regset == HLSL_REGSET_SAMPLERS) +- { +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- put_u32(&buffer, 0); +- } ++ if (resource->buffer) ++ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + else ++ put_u32(&buffer, sm4_resource_type(resource->data_type)); ++ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) + { + unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; + +@@ -3418,32 +3468,21 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } +- put_u32(&buffer, resource->id); ++ else ++ { ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ put_u32(&buffer, 0); ++ } ++ put_u32(&buffer, resource->index); + put_u32(&buffer, resource->bind_count); + put_u32(&buffer, flags); +- } +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- uint32_t flags = 0; +- +- if (!cbuffer->reg.allocated) +- continue; + + if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); +- +- if (cbuffer->reservation.reg_type) +- flags |= D3D_SIF_USERPACKED; +- +- put_u32(&buffer, 0); /* name */ +- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); +- put_u32(&buffer, 0); /* return type */ +- put_u32(&buffer, 0); /* dimension */ +- put_u32(&buffer, 0); /* multisample count */ +- put_u32(&buffer, cbuffer->reg.id); /* bind point */ +- put_u32(&buffer, 1); /* bind count */ +- put_u32(&buffer, flags); /* flags */ ++ { ++ put_u32(&buffer, resource->space); ++ put_u32(&buffer, resource->id); ++ } + } + + for (i = 0; i < extern_resources_count; ++i) +@@ -3451,16 +3490,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + const struct extern_resource *resource = &extern_resources[i]; + + string_offset = put_string(&buffer, resource->name); +- set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); +- } +- +- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) +- { +- if (!cbuffer->reg.allocated) +- continue; +- +- string_offset = put_string(&buffer, cbuffer->name); +- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); ++ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); + } + + /* Buffers. */ +@@ -3522,7 +3552,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); + put_u32(&buffer, flags); + put_u32(&buffer, 0); /* type */ +- put_u32(&buffer, 0); /* FIXME: default value */ ++ put_u32(&buffer, 0); /* default value */ + + if (profile->major_version >= 5) + { +@@ -3546,6 +3576,34 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + set_u32(&buffer, var_offset, string_offset); + write_sm4_type(ctx, &buffer, var->data_type); + set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); ++ ++ if (var->default_values) ++ { ++ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; ++ unsigned int comp_count = hlsl_type_component_count(var->data_type); ++ unsigned int default_value_offset; ++ unsigned int k; ++ ++ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); ++ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); ++ ++ for (k = 0; k < comp_count; ++k) ++ { ++ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); ++ unsigned int comp_offset; ++ enum hlsl_regset regset; ++ ++ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); ++ if (regset == HLSL_REGSET_NUMERIC) ++ { ++ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) ++ hlsl_fixme(ctx, &var->loc, "Write double default values."); ++ ++ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), ++ var->default_values[k].value.u); ++ } ++ } ++ } + ++j; + } + } +@@ -3611,9 +3669,9 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod + switch (imod->type) + { + case VKD3D_SM4_MODIFIER_AOFFIMMI: +- assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); +- assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); +- assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); ++ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); ++ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); ++ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); + word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; + word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; +@@ -3652,7 +3710,7 @@ struct sm4_instruction + static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, + const struct hlsl_ir_node *instr) + { +- assert(instr->reg.allocated); ++ VKD3D_ASSERT(instr->reg.allocated); + reg->type = VKD3DSPR_TEMP; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = instr->reg.id; +@@ -3671,7 +3729,7 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s + reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; + reg->dimension = VSIR_DIMENSION_VEC4; + +- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); ++ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); + + if (!var->indexable) + { +@@ -3690,13 +3748,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s + struct vkd3d_shader_src_param *idx_src; + unsigned int idx_writemask; + +- assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); ++ VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); + idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; + memset(idx_src, 0, sizeof(*idx_src)); + + reg->idx[1].rel_addr = idx_src; + sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); +- assert(idx_writemask != 0); ++ VKD3D_ASSERT(idx_writemask != 0); + idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); + } + } +@@ -3720,42 +3778,79 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + reg->type = VKD3DSPR_RESOURCE; + reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- assert(regset == HLSL_REGSET_TEXTURES); +- reg->idx_count = 1; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_UAVS) + { + reg->type = VKD3DSPR_UAV; + reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- assert(regset == HLSL_REGSET_UAVS); +- reg->idx_count = 1; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else if (regset == HLSL_REGSET_SAMPLERS) + { + reg->type = VKD3DSPR_SAMPLER; + reg->dimension = VSIR_DIMENSION_NONE; +- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; +- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); +- assert(regset == HLSL_REGSET_SAMPLERS); +- reg->idx_count = 1; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; ++ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ ++ reg->idx_count = 2; ++ } ++ else ++ { ++ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; ++ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); ++ reg->idx_count = 1; ++ } ++ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); + *writemask = VKD3DSP_WRITEMASK_ALL; + } + else + { + unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; + +- assert(data_type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); + reg->type = VKD3DSPR_CONSTBUFFER; + reg->dimension = VSIR_DIMENSION_VEC4; +- reg->idx[0].offset = var->buffer->reg.id; +- reg->idx[1].offset = offset / 4; +- reg->idx_count = 2; ++ if (hlsl_version_ge(ctx, 5, 1)) ++ { ++ reg->idx[0].offset = var->buffer->reg.id; ++ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ ++ reg->idx[2].offset = offset / 4; ++ reg->idx_count = 3; ++ } ++ else ++ { ++ reg->idx[0].offset = var->buffer->reg.index; ++ reg->idx[1].offset = offset / 4; ++ reg->idx_count = 2; ++ } + *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); + } + } +@@ -3780,7 +3875,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + +- assert(hlsl_reg.allocated); ++ VKD3D_ASSERT(hlsl_reg.allocated); + reg->type = VKD3DSPR_INPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; +@@ -3812,7 +3907,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re + { + struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); + +- assert(hlsl_reg.allocated); ++ VKD3D_ASSERT(hlsl_reg.allocated); + reg->type = VKD3DSPR_OUTPUT; + reg->dimension = VSIR_DIMENSION_VEC4; + reg->idx[0].offset = hlsl_reg.id; +@@ -3948,7 +4043,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v + switch (sm4_swizzle_type) + { + case VKD3D_SM4_SWIZZLE_NONE: +- assert(sm4_swizzle || register_is_constant(reg)); ++ VKD3D_ASSERT(sm4_swizzle || register_is_constant(reg)); + token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; + break; + +@@ -3980,16 +4075,16 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct + const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; + uint32_t idx_src_token; + +- assert(idx_src); +- assert(!idx_src->modifiers); +- assert(idx_src->reg.type != VKD3DSPR_IMMCONST); ++ VKD3D_ASSERT(idx_src); ++ VKD3D_ASSERT(!idx_src->modifiers); ++ VKD3D_ASSERT(idx_src->reg.type != VKD3DSPR_IMMCONST); + idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle); + + put_u32(buffer, idx_src_token); + for (k = 0; k < idx_src->reg.idx_count; ++k) + { + put_u32(buffer, idx_src->reg.idx[k].offset); +- assert(!idx_src->reg.idx[k].rel_addr); ++ VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); + } + } + else +@@ -4139,18 +4234,36 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, + + static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) + { +- const struct sm4_instruction instr = ++ size_t size = (cbuffer->used_size + 3) / 4; ++ ++ struct sm4_instruction instr = + { + .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, + + .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, + .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, +- .srcs[0].reg.idx[0].offset = cbuffer->reg.id, +- .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, +- .srcs[0].reg.idx_count = 2, + .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, + .src_count = 1, + }; ++ ++ if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ { ++ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; ++ instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; ++ instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ ++ instr.srcs[0].reg.idx_count = 3; ++ ++ instr.idx[0] = size; ++ instr.idx[1] = cbuffer->reg.space; ++ instr.idx_count = 2; ++ } ++ else ++ { ++ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; ++ instr.srcs[0].reg.idx[1].offset = size; ++ instr.srcs[0].reg.idx_count = 2; ++ } ++ + write_sm4_instruction(tpf, &instr); + } + +@@ -4163,7 +4276,6 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex + .opcode = VKD3D_SM4_OP_DCL_SAMPLER, + + .dsts[0].reg.type = VKD3DSPR_SAMPLER, +- .dsts[0].reg.idx_count = 1, + .dst_count = 1, + }; + +@@ -4172,14 +4284,29 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex + if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; + +- assert(resource->regset == HLSL_REGSET_SAMPLERS); ++ VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); + + for (i = 0; i < resource->bind_count; ++i) + { + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) + continue; + +- instr.dsts[0].reg.idx[0].offset = resource->id + i; ++ if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ { ++ VKD3D_ASSERT(!i); ++ instr.dsts[0].reg.idx[0].offset = resource->id; ++ instr.dsts[0].reg.idx[1].offset = resource->index; ++ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ ++ instr.dsts[0].reg.idx_count = 3; ++ ++ instr.idx[0] = resource->space; ++ instr.idx_count = 1; ++ } ++ else ++ { ++ instr.dsts[0].reg.idx[0].offset = resource->index + i; ++ instr.dsts[0].reg.idx_count = 1; ++ } + write_sm4_instruction(tpf, &instr); + } + } +@@ -4192,7 +4319,7 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + struct sm4_instruction instr; + unsigned int i; + +- assert(resource->regset == regset); ++ VKD3D_ASSERT(resource->regset == regset); + + component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + +@@ -4212,6 +4339,23 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + .idx_count = 1, + }; + ++ if (hlsl_version_ge(tpf->ctx, 5, 1)) ++ { ++ VKD3D_ASSERT(!i); ++ instr.dsts[0].reg.idx[0].offset = resource->id; ++ instr.dsts[0].reg.idx[1].offset = resource->index; ++ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ ++ instr.dsts[0].reg.idx_count = 3; ++ ++ instr.idx[1] = resource->space; ++ instr.idx_count = 2; ++ } ++ else ++ { ++ instr.dsts[0].reg.idx[0].offset = resource->index + i; ++ instr.dsts[0].reg.idx_count = 1; ++ } ++ + if (uav) + { + switch (resource->data_type->sampler_dim) +@@ -4449,7 +4593,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + +- assert(dst_idx < ARRAY_SIZE(instr.dsts)); ++ VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; +@@ -4508,7 +4652,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t + memset(&instr, 0, sizeof(instr)); + instr.opcode = opcode; + +- assert(dst_idx < ARRAY_SIZE(instr.dsts)); ++ VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); + sm4_dst_from_node(&instr.dsts[dst_idx], dst); + instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; + instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; +@@ -4706,7 +4850,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl + const struct hlsl_ir_node *dst = &load->node; + struct sm4_instruction instr; + +- assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); ++ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; +@@ -4735,7 +4879,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir + return; + } + +- assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); ++ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_RESINFO; +@@ -4789,7 +4933,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex + const struct hlsl_type *src_type = arg1->data_type; + + /* Narrowing casts were already lowered. */ +- assert(src_type->dimx == dst_type->dimx); ++ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); + + switch (dst_type->e.numeric.type) + { +@@ -4904,6 +5048,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct + write_sm4_instruction(tpf, &instr); + } + ++static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) ++{ ++ struct sm4_instruction instr; ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; ++ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; ++ ++ sm4_dst_from_node(&instr.dsts[0], dst); ++ instr.dst_count = 1; ++ ++ instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; ++ instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); ++ instr.src_count = 1; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ + static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) + { + const struct hlsl_ir_node *arg1 = expr->operands[0].node; +@@ -4912,13 +5075,21 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + const struct hlsl_type *dst_type = expr->node.data_type; + struct vkd3d_string_buffer *dst_type_string; + +- assert(expr->node.reg.allocated); ++ VKD3D_ASSERT(expr->node.reg.allocated); + + if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) + return; + + switch (expr->op) + { ++ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: ++ if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) ++ write_sm4_rasterizer_sample_count(tpf, &expr->node); ++ else ++ hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, ++ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); ++ break; ++ + case HLSL_OP1_ABS: + switch (dst_type->e.numeric.type) + { +@@ -4932,7 +5103,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP1_BIT_NOT: +- assert(type_is_integer(dst_type)); ++ VKD3D_ASSERT(type_is_integer(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + +@@ -4941,67 +5112,73 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP1_CEIL: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_COS: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); + break; + + case HLSL_OP1_DSX: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_COARSE: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_FINE: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_COARSE: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_FINE: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_EXP2: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); + break; + ++ case HLSL_OP1_F16TOF32: ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); ++ break; ++ + case HLSL_OP1_FLOOR: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); + break; + + case HLSL_OP1_FRACT: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOG2: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); + break; + + case HLSL_OP1_LOGIC_NOT: +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); + break; + +@@ -5022,39 +5199,77 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + } + break; + ++ case HLSL_OP1_RCP: ++ switch (dst_type->e.numeric.type) ++ { ++ case HLSL_TYPE_FLOAT: ++ /* SM5 comes with a RCP opcode */ ++ if (tpf->ctx->profile->major_version >= 5) ++ { ++ write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); ++ } ++ else ++ { ++ /* For SM4, implement as DIV dst, 1.0, src */ ++ struct sm4_instruction instr; ++ struct hlsl_constant_value one; ++ ++ VKD3D_ASSERT(type_is_float(dst_type)); ++ ++ memset(&instr, 0, sizeof(instr)); ++ instr.opcode = VKD3D_SM4_OP_DIV; ++ ++ sm4_dst_from_node(&instr.dsts[0], &expr->node); ++ instr.dst_count = 1; ++ ++ for (unsigned int i = 0; i < 4; i++) ++ one.u[i].f = 1.0f; ++ sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); ++ sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); ++ instr.src_count = 2; ++ ++ write_sm4_instruction(tpf, &instr); ++ } ++ break; ++ ++ default: ++ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); ++ } ++ break; ++ + case HLSL_OP1_REINTERPRET: + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); + break; + + case HLSL_OP1_ROUND: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_RSQ: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); + break; + + case HLSL_OP1_SAT: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV + | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), + &expr->node, arg1, 0); + break; + + case HLSL_OP1_SIN: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); + break; + + case HLSL_OP1_SQRT: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); + break; + + case HLSL_OP1_TRUNC: +- assert(type_is_float(dst_type)); ++ VKD3D_ASSERT(type_is_float(dst_type)); + write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); + break; + +@@ -5076,17 +5291,17 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + break; + + case HLSL_OP2_BIT_AND: +- assert(type_is_integer(dst_type)); ++ VKD3D_ASSERT(type_is_integer(dst_type)); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_OR: +- assert(type_is_integer(dst_type)); ++ VKD3D_ASSERT(type_is_integer(dst_type)); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: +- assert(type_is_integer(dst_type)); ++ VKD3D_ASSERT(type_is_integer(dst_type)); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); + break; + +@@ -5139,7 +5354,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { +@@ -5165,7 +5380,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { +@@ -5194,7 +5409,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { +@@ -5220,18 +5435,18 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + } + + case HLSL_OP2_LOGIC_AND: +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LOGIC_OR: +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); + break; + + case HLSL_OP2_LSHIFT: +- assert(type_is_integer(dst_type)); +- assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); + break; + +@@ -5310,7 +5525,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + { + const struct hlsl_type *src_type = arg1->data_type; + +- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + + switch (src_type->e.numeric.type) + { +@@ -5333,8 +5548,8 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex + } + + case HLSL_OP2_RSHIFT: +- assert(type_is_integer(dst_type)); +- assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); ++ VKD3D_ASSERT(type_is_integer(dst_type)); ++ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, + &expr->node, arg1, arg2); + break; +@@ -5358,7 +5573,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * + .src_count = 1, + }; + +- assert(iff->condition.node->data_type->dimx == 1); ++ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); + + sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); + write_sm4_instruction(tpf, &instr); +@@ -5436,7 +5651,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo + sm4_dst_from_node(&instr.dsts[0], &load->node); + instr.dst_count = 1; + +- assert(hlsl_is_numeric_type(type)); ++ VKD3D_ASSERT(hlsl_is_numeric_type(type)); + if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) + { + struct hlsl_constant_value value; +@@ -5553,7 +5768,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + case HLSL_RESOURCE_SAMPLE_GRAD: + /* Combined sample expressions were lowered. */ +- assert(load->sampler.var); ++ VKD3D_ASSERT(load->sampler.var); + write_sm4_sample(tpf, load); + break; + +@@ -5706,7 +5921,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc + + if (!instr->reg.allocated) + { +- assert(instr->type == HLSL_IR_CONSTANT); ++ VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); + continue; + } + } +@@ -5799,21 +6014,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, + LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (cbuffer->reg.allocated) +- { +- if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); +- + write_sm4_dcl_constant_buffer(&tpf, cbuffer); +- } + } + + for (i = 0; i < extern_resources_count; ++i) + { + const struct extern_resource *resource = &extern_resources[i]; + +- if (hlsl_version_ge(ctx, 5, 1)) +- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); +- + if (resource->regset == HLSL_REGSET_SAMPLERS) + write_sm4_dcl_samplers(&tpf, resource); + else if (resource->regset == HLSL_REGSET_TEXTURES) +@@ -5875,7 +6082,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + for (unsigned int i = 0; i < extern_resources_count; ++i) + { +- if (extern_resources[i].data_type->e.resource.rasteriser_ordered) ++ if (extern_resources[i].data_type && extern_resources[i].data_type->e.resource.rasteriser_ordered) + *flags |= VKD3D_SM4_REQUIRES_ROVS; + } + sm4_free_extern_resources(extern_resources, extern_resources_count); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 14a3fa778e5..3c1ffcdbee3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -23,6 +23,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -60,7 +62,7 @@ void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer) + buffer->buffer_size = 16; + buffer->content_size = 0; + buffer->buffer = vkd3d_malloc(buffer->buffer_size); +- assert(buffer->buffer); ++ VKD3D_ASSERT(buffer->buffer); + memset(buffer->buffer, 0, buffer->buffer_size); + } + +@@ -228,7 +230,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct + { + if (!buffer) + return; +- assert(cache->count + 1 <= cache->max_count); ++ VKD3D_ASSERT(cache->count + 1 <= cache->max_count); + cache->buffers[cache->count++] = buffer; + } + +@@ -429,7 +431,7 @@ static void bytecode_set_bytes(struct vkd3d_bytecode_buffer *buffer, size_t offs + if (buffer->status) + return; + +- assert(vkd3d_bound_range(offset, size, buffer->size)); ++ VKD3D_ASSERT(vkd3d_bound_range(offset, size, buffer->size)); + memcpy(buffer->data + offset, value, size); + } + +@@ -642,7 +644,7 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig + signature->element_count = src->element_count; + if (!src->elements) + { +- assert(!signature->element_count); ++ VKD3D_ASSERT(!signature->element_count); + signature->elements = NULL; + return true; + } +@@ -787,7 +789,7 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_push_cf_info(struct vkd3d_ + + static void vkd3d_shader_scan_pop_cf_info(struct vkd3d_shader_scan_context *context) + { +- assert(context->cf_info_count); ++ VKD3D_ASSERT(context->cf_info_count); + + --context->cf_info_count; + } +@@ -847,12 +849,13 @@ static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_contex + + static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) +- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) +- || handler_idx == VKD3DSIH_LD_UAV_TYPED +- || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) +- || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ ++ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) ++ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR) ++ || opcode == VKD3DSIH_LD_UAV_TYPED ++ || (opcode == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) ++ || (opcode == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); + } + + static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, +@@ -863,9 +866,9 @@ static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context * + + static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC +- || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ ++ return opcode == VKD3DSIH_IMM_ATOMIC_ALLOC || opcode == VKD3DSIH_IMM_ATOMIC_CONSUME; + } + + static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, +@@ -876,9 +879,10 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_contex + + static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) + { +- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; +- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) +- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR); ++ enum vkd3d_shader_opcode opcode = instruction->opcode; ++ ++ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) ++ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR); + } + + static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, +@@ -1130,7 +1134,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte + + context->location = instruction->location; + +- switch (instruction->handler_idx) ++ switch (instruction->opcode) + { + case VKD3DSIH_DCL_CONSTANT_BUFFER: + vkd3d_shader_scan_constant_buffer_declaration(context, instruction); +@@ -2063,7 +2067,7 @@ bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *ins + bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *instructions, + unsigned int idx, unsigned int count) + { +- assert(idx <= instructions->count); ++ VKD3D_ASSERT(idx <= instructions->count); + + if (!shader_instruction_array_reserve(instructions, instructions->count + count)) + return false; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 29b8d6ad022..13b4dab76d1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -151,6 +151,8 @@ enum vkd3d_shader_error + VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, + VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, + VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, ++ VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, ++ VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, + + VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, + VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, +@@ -455,6 +457,10 @@ enum vkd3d_shader_opcode + VKD3DSIH_PHASE, + VKD3DSIH_PHI, + VKD3DSIH_POW, ++ VKD3DSIH_QUAD_READ_ACROSS_D, ++ VKD3DSIH_QUAD_READ_ACROSS_X, ++ VKD3DSIH_QUAD_READ_ACROSS_Y, ++ VKD3DSIH_QUAD_READ_LANE_AT, + VKD3DSIH_RCP, + VKD3DSIH_REP, + VKD3DSIH_RESINFO, +@@ -613,6 +619,7 @@ enum vkd3d_shader_register_type + VKD3DSPR_SSA, + VKD3DSPR_WAVELANECOUNT, + VKD3DSPR_WAVELANEINDEX, ++ VKD3DSPR_PARAMETER, + + VKD3DSPR_COUNT, + +@@ -805,6 +812,7 @@ enum vkd3d_tessellator_domain + + #define VKD3DSI_NONE 0x0 + #define VKD3DSI_TEXLD_PROJECT 0x1 ++#define VKD3DSI_TEXLD_BIAS 0x2 + #define VKD3DSI_INDEXED_DYNAMIC 0x4 + #define VKD3DSI_RESINFO_RCP_FLOAT 0x1 + #define VKD3DSI_RESINFO_UINT 0x2 +@@ -1189,7 +1197,7 @@ struct vkd3d_shader_location + struct vkd3d_shader_instruction + { + struct vkd3d_shader_location location; +- enum vkd3d_shader_opcode handler_idx; ++ enum vkd3d_shader_opcode opcode; + uint32_t flags; + unsigned int dst_count; + unsigned int src_count; +@@ -1238,8 +1246,8 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns + return v->major < major || (v->major == major && v->minor <= minor); + } + +-void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, +- enum vkd3d_shader_opcode handler_idx); ++void vsir_instruction_init(struct vkd3d_shader_instruction *ins, ++ const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode); + + static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) + { +@@ -1303,14 +1311,14 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, + static inline struct vkd3d_shader_src_param *shader_src_param_allocator_get( + struct vkd3d_shader_param_allocator *allocator, unsigned int count) + { +- assert(allocator->stride == sizeof(struct vkd3d_shader_src_param)); ++ VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_src_param)); + return shader_param_allocator_get(allocator, count); + } + + static inline struct vkd3d_shader_dst_param *shader_dst_param_allocator_get( + struct vkd3d_shader_param_allocator *allocator, unsigned int count) + { +- assert(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); ++ VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); + return shader_param_allocator_get(allocator, count); + } + +@@ -1355,6 +1363,10 @@ struct vsir_program + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; + ++ unsigned int parameter_count; ++ const struct vkd3d_shader_parameter1 *parameters; ++ bool free_parameters; ++ + unsigned int input_control_point_count, output_control_point_count; + unsigned int flat_constant_count[3]; + unsigned int block_count; +@@ -1370,7 +1382,10 @@ void vsir_program_cleanup(struct vsir_program *program); + int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context); +-bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); ++const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( ++ const struct vsir_program *program, enum vkd3d_shader_parameter_name name); ++bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, ++ const struct vkd3d_shader_version *version, unsigned int reserve); + enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, +@@ -1663,7 +1678,7 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask + { + unsigned int i; + +- assert(write_mask); ++ VKD3D_ASSERT(write_mask); + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) +@@ -1677,13 +1692,13 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask + static inline unsigned int vsir_write_mask_component_count(uint32_t write_mask) + { + unsigned int count = vkd3d_popcount(write_mask & VKD3DSP_WRITEMASK_ALL); +- assert(1 <= count && count <= VKD3D_VEC4_SIZE); ++ VKD3D_ASSERT(1 <= count && count <= VKD3D_VEC4_SIZE); + return count; + } + + static inline unsigned int vkd3d_write_mask_from_component_count(unsigned int component_count) + { +- assert(component_count <= VKD3D_VEC4_SIZE); ++ VKD3D_ASSERT(component_count <= VKD3D_VEC4_SIZE); + return (VKD3DSP_WRITEMASK_0 << component_count) - 1; + } + +diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c +index a0a29ed30cb..11d87ac1d98 100644 +--- a/libs/vkd3d/libs/vkd3d/cache.c ++++ b/libs/vkd3d/libs/vkd3d/cache.c +@@ -69,7 +69,14 @@ static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry + static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, + struct shader_cache_entry *e) + { +- rb_put(&cache->tree, &e->h.hash, &e->entry); ++ const struct shader_cache_key k = ++ { ++ .hash = e->h.hash, ++ .key_size = e->h.key_size, ++ .key = e->payload ++ }; ++ ++ rb_put(&cache->tree, &k, &e->entry); + } + + int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) +diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c +index 95366d3441b..dcc7690876f 100644 +--- a/libs/vkd3d/libs/vkd3d/command.c ++++ b/libs/vkd3d/libs/vkd3d/command.c +@@ -93,7 +93,7 @@ VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue) + + vkd3d_mutex_lock(&queue->mutex); + +- assert(queue->vk_queue); ++ VKD3D_ASSERT(queue->vk_queue); + return queue->vk_queue; + } + +@@ -423,7 +423,7 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, + static const struct d3d12_root_parameter *root_signature_get_parameter( + const struct d3d12_root_signature *root_signature, unsigned int index) + { +- assert(index < root_signature->parameter_count); ++ VKD3D_ASSERT(index < root_signature->parameter_count); + return &root_signature->parameters[index]; + } + +@@ -431,7 +431,7 @@ static const struct d3d12_root_descriptor_table *root_signature_get_descriptor_t + const struct d3d12_root_signature *root_signature, unsigned int index) + { + const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); +- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); ++ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); + return &p->u.descriptor_table; + } + +@@ -439,7 +439,7 @@ static const struct d3d12_root_constant *root_signature_get_32bit_constants( + const struct d3d12_root_signature *root_signature, unsigned int index) + { + const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); +- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); ++ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); + return &p->u.constant; + } + +@@ -447,7 +447,7 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( + const struct d3d12_root_signature *root_signature, unsigned int index) + { + const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); +- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV ++ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV + || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV + || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV); + return p; +@@ -528,7 +528,7 @@ static void d3d12_fence_garbage_collect_vk_semaphores_locked(struct d3d12_fence + + if (current->u.binary.vk_fence) + WARN("Destroying potentially pending semaphore.\n"); +- assert(!current->u.binary.is_acquired); ++ VKD3D_ASSERT(!current->u.binary.is_acquired); + + VK_CALL(vkDestroySemaphore(device->vk_device, current->u.binary.vk_semaphore, NULL)); + fence->semaphores[i] = fence->semaphores[--fence->semaphore_count]; +@@ -599,7 +599,7 @@ static void d3d12_fence_remove_vk_semaphore(struct d3d12_fence *fence, struct vk + { + vkd3d_mutex_lock(&fence->mutex); + +- assert(semaphore->u.binary.is_acquired); ++ VKD3D_ASSERT(semaphore->u.binary.is_acquired); + + *semaphore = fence->semaphores[--fence->semaphore_count]; + +@@ -610,7 +610,7 @@ static void d3d12_fence_release_vk_semaphore(struct d3d12_fence *fence, struct v + { + vkd3d_mutex_lock(&fence->mutex); + +- assert(semaphore->u.binary.is_acquired); ++ VKD3D_ASSERT(semaphore->u.binary.is_acquired); + semaphore->u.binary.is_acquired = false; + + vkd3d_mutex_unlock(&fence->mutex); +@@ -1154,7 +1154,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) + + if (!(iface1 = (ID3D12Fence1 *)iface)) + return NULL; +- assert(iface1->lpVtbl == &d3d12_fence_vtbl); ++ VKD3D_ASSERT(iface1->lpVtbl == &d3d12_fence_vtbl); + return impl_from_ID3D12Fence1(iface1); + } + +@@ -1792,7 +1792,7 @@ static struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(I + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == &d3d12_command_allocator_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_allocator_vtbl); + return impl_from_ID3D12CommandAllocator(iface); + } + +@@ -1942,9 +1942,9 @@ static void d3d12_command_signature_decref(struct d3d12_command_signature *signa + } + + /* ID3D12CommandList */ +-static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList5(ID3D12GraphicsCommandList5 *iface) ++static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList6(ID3D12GraphicsCommandList6 *iface) + { +- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); ++ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); + } + + static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) +@@ -2025,7 +2025,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l + + static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, + const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, +- VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) ++ VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout, ++ struct d3d12_device *device) + { + bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); + VkPipelineStageFlags queue_shader_stages = 0; +@@ -2033,10 +2034,12 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, + if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) + { + queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT +- | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT +- | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT +- | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT + | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; ++ if (device->vk_info.geometry_shaders) ++ queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; ++ if (device->vk_info.tessellation_shaders) ++ queue_shader_stages |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT ++ | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; + } + if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) + queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; +@@ -2054,7 +2057,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, + { + if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) + return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, +- resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); ++ resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout, device); + + *access_mask = VK_ACCESS_MEMORY_READ_BIT; + *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; +@@ -2165,7 +2168,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, + } + + /* Handle read-only states. */ +- assert(!is_write_resource_state(state)); ++ VKD3D_ASSERT(!is_write_resource_state(state)); + + if (state & D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER) + { +@@ -2239,7 +2242,7 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 + VkPipelineStageFlags src_stage_mask, dst_stage_mask; + VkImageMemoryBarrier barrier; + +- assert(d3d12_resource_is_texture(resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); + + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; +@@ -2251,7 +2254,8 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 + VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; + + if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, +- resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) ++ resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, ++ &dst_stage_mask, &barrier.newLayout, list->device)) + { + FIXME("Unhandled state %#x.\n", resource->initial_state); + return; +@@ -2285,12 +2289,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l + } + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList5 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList6 *iface, + REFIID iid, void **object) + { + TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); + +- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) ++ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList6) ++ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) +@@ -2301,7 +2306,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic + || IsEqualGUID(iid, &IID_ID3D12Object) + || IsEqualGUID(iid, &IID_IUnknown)) + { +- ID3D12GraphicsCommandList5_AddRef(iface); ++ ID3D12GraphicsCommandList6_AddRef(iface); + *object = iface; + return S_OK; + } +@@ -2312,9 +2317,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic + return E_NOINTERFACE; + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList5 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList6 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + unsigned int refcount = vkd3d_atomic_increment_u32(&list->refcount); + + TRACE("%p increasing refcount to %u.\n", list, refcount); +@@ -2327,9 +2332,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind + vkd3d_free(bindings->vk_uav_counter_views); + } + +-static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList5 *iface) ++static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList6 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + unsigned int refcount = vkd3d_atomic_decrement_u32(&list->refcount); + + TRACE("%p decreasing refcount to %u.\n", list, refcount); +@@ -2355,66 +2360,67 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL + return refcount; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList5 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList6 *iface, + REFGUID guid, UINT *data_size, void *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_get_private_data(&list->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList5 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList6 *iface, + REFGUID guid, UINT data_size, const void *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); + + return vkd3d_set_private_data(&list->private_store, guid, data_size, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList5 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList6 *iface, + REFGUID guid, const IUnknown *data) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); + + return vkd3d_set_private_data_interface(&list->private_store, guid, data); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList5 *iface, const WCHAR *name) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList6 *iface, const WCHAR *name) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); + + return name ? S_OK : E_INVALIDARG; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList5 *iface, REFIID iid, void **device) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList6 *iface, ++ REFIID iid, void **device) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); + + return d3d12_device_query_interface(list->device, iid, device); + } + +-static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList5 *iface) ++static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList6 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p.\n", iface); + + return list->type; + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList5 *iface) ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList6 *iface) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + VkResult vr; + +@@ -2458,7 +2464,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL + static void d3d12_command_list_reset_state(struct d3d12_command_list *list, + ID3D12PipelineState *initial_pipeline_state) + { +- ID3D12GraphicsCommandList5 *iface = &list->ID3D12GraphicsCommandList5_iface; ++ ID3D12GraphicsCommandList6 *iface = &list->ID3D12GraphicsCommandList6_iface; + + memset(list->strides, 0, sizeof(list->strides)); + list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; +@@ -2494,14 +2500,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, + + list->descriptor_heap_count = 0; + +- ID3D12GraphicsCommandList5_SetPipelineState(iface, initial_pipeline_state); ++ ID3D12GraphicsCommandList6_SetPipelineState(iface, initial_pipeline_state); + } + +-static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList5 *iface, ++static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList6 *iface, + ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) + { + struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + HRESULT hr; + + TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", +@@ -2528,7 +2534,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL + return hr; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList6 *iface, + ID3D12PipelineState *pipeline_state) + { + FIXME("iface %p, pipeline_state %p stub!\n", iface, pipeline_state); +@@ -2538,7 +2544,7 @@ static bool d3d12_command_list_has_depth_stencil_view(struct d3d12_command_list + { + struct d3d12_graphics_pipeline_state *graphics; + +- assert(d3d12_pipeline_state_is_graphics(list->state)); ++ VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(list->state)); + graphics = &list->state->u.graphics; + + return graphics->dsv_format || (d3d12_pipeline_state_has_unknown_dsv_format(list->state) && list->dsv_format); +@@ -2973,30 +2979,20 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list + enum vkd3d_pipeline_bind_point bind_point) + { + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; ++ VkWriteDescriptorSet descriptor_writes[ARRAY_SIZE(bindings->push_descriptors)] = {0}; ++ VkDescriptorBufferInfo buffer_infos[ARRAY_SIZE(bindings->push_descriptors)] = {0}; + const struct d3d12_root_signature *root_signature = bindings->root_signature; +- VkWriteDescriptorSet *descriptor_writes = NULL, *current_descriptor_write; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; +- VkDescriptorBufferInfo *buffer_infos = NULL, *current_buffer_info; + const struct d3d12_root_parameter *root_parameter; + struct vkd3d_push_descriptor *push_descriptor; + struct d3d12_device *device = list->device; + VkDescriptorBufferInfo *vk_buffer_info; +- unsigned int i, descriptor_count; ++ unsigned int i, descriptor_count = 0; + VkBufferView *vk_buffer_view; + + if (!bindings->push_descriptor_dirty_mask) + return; + +- descriptor_count = vkd3d_popcount(bindings->push_descriptor_dirty_mask); +- +- if (!(descriptor_writes = vkd3d_calloc(descriptor_count, sizeof(*descriptor_writes)))) +- return; +- if (!(buffer_infos = vkd3d_calloc(descriptor_count, sizeof(*buffer_infos)))) +- goto done; +- +- descriptor_count = 0; +- current_buffer_info = buffer_infos; +- current_descriptor_write = descriptor_writes; + for (i = 0; i < ARRAY_SIZE(bindings->push_descriptors); ++i) + { + if (!(bindings->push_descriptor_dirty_mask & (1u << i))) +@@ -3008,7 +3004,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list + if (root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV) + { + vk_buffer_view = NULL; +- vk_buffer_info = current_buffer_info; ++ vk_buffer_info = &buffer_infos[descriptor_count]; + vk_buffer_info->buffer = push_descriptor->u.cbv.vk_buffer; + vk_buffer_info->offset = push_descriptor->u.cbv.offset; + vk_buffer_info->range = VK_WHOLE_SIZE; +@@ -3019,21 +3015,15 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list + vk_buffer_info = NULL; + } + +- if (!vk_write_descriptor_set_from_root_descriptor(current_descriptor_write, ++ if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], + root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) + continue; + + ++descriptor_count; +- ++current_descriptor_write; +- ++current_buffer_info; + } + + VK_CALL(vkUpdateDescriptorSets(device->vk_device, descriptor_count, descriptor_writes, 0, NULL)); + bindings->push_descriptor_dirty_mask = 0; +- +-done: +- vkd3d_free(descriptor_writes); +- vkd3d_free(buffer_infos); + } + + static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_command_list *list, +@@ -3063,7 +3053,7 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma + const struct vkd3d_shader_uav_counter_binding *uav_counter = &state->uav_counters.bindings[i]; + const VkBufferView *vk_uav_counter_views = bindings->vk_uav_counter_views; + +- assert(vk_uav_counter_views[i]); ++ VKD3D_ASSERT(vk_uav_counter_views[i]); + + vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vk_descriptor_writes[i].pNext = NULL; +@@ -3336,7 +3326,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list + return true; + + vk_render_pass = list->pso_render_pass; +- assert(vk_render_pass); ++ VKD3D_ASSERT(vk_render_pass); + + begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + begin_desc.pNext = NULL; +@@ -3392,11 +3382,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList6 *iface, + UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, + UINT start_instance_location) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " +@@ -3416,11 +3406,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom + instance_count, start_vertex_location, start_instance_location)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList6 *iface, + UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, + INT base_vertex_location, UINT start_instance_location) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " +@@ -3442,10 +3432,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap + instance_count, start_vertex_location, base_vertex_location, start_instance_location)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList6 *iface, + UINT x, UINT y, UINT z) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); +@@ -3461,10 +3451,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL + VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; + VkBufferCopy buffer_copy; +@@ -3476,9 +3466,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics + vk_procs = &list->device->vk_procs; + + dst_resource = unsafe_impl_from_ID3D12Resource(dst); +- assert(d3d12_resource_is_buffer(dst_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); + src_resource = unsafe_impl_from_ID3D12Resource(src); +- assert(d3d12_resource_is_buffer(src_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); + + d3d12_command_list_track_resource_usage(list, dst_resource); + d3d12_command_list_track_resource_usage(list, src_resource); +@@ -3679,11 +3669,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + src_format->dxgi_format, src_format->vk_format, + dst_format->dxgi_format, dst_format->vk_format); + +- assert(d3d12_resource_is_texture(dst_resource)); +- assert(d3d12_resource_is_texture(src_resource)); +- assert(!vkd3d_format_is_compressed(dst_format)); +- assert(!vkd3d_format_is_compressed(src_format)); +- assert(dst_format->byte_count == src_format->byte_count); ++ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); ++ VKD3D_ASSERT(!vkd3d_format_is_compressed(dst_format)); ++ VKD3D_ASSERT(!vkd3d_format_is_compressed(src_format)); ++ VKD3D_ASSERT(dst_format->byte_count == src_format->byte_count); + + buffer_image_copy.bufferOffset = 0; + buffer_image_copy.bufferRowLength = 0; +@@ -3727,11 +3717,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com + buffer_image_copy.imageSubresource.layerCount = layer_count; + dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; + +- assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == ++ VKD3D_ASSERT(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == + d3d12_resource_desc_get_width(dst_desc, dst_miplevel_idx)); +- assert(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == ++ VKD3D_ASSERT(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == + d3d12_resource_desc_get_height(dst_desc, dst_miplevel_idx)); +- assert(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == ++ VKD3D_ASSERT(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == + d3d12_resource_desc_get_depth(dst_desc, dst_miplevel_idx)); + + VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer, +@@ -3746,11 +3736,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) + && box->back > box->front; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList6 *iface, + const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, + const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *src_format, *dst_format; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -3779,8 +3769,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX + && dst->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT) + { +- assert(d3d12_resource_is_buffer(dst_resource)); +- assert(d3d12_resource_is_texture(src_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); + + if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device, + &src_resource->desc, dst->u.PlacedFootprint.Footprint.Format))) +@@ -3808,8 +3798,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + else if (src->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT + && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) + { +- assert(d3d12_resource_is_texture(dst_resource)); +- assert(d3d12_resource_is_buffer(src_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); + + if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device, + &dst_resource->desc, src->u.PlacedFootprint.Footprint.Format))) +@@ -3837,8 +3827,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + else if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX + && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) + { +- assert(d3d12_resource_is_texture(dst_resource)); +- assert(d3d12_resource_is_texture(src_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); + + dst_format = dst_resource->format; + src_format = src_resource->format; +@@ -3871,10 +3861,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *dst, ID3D12Resource *src) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *dst_format, *src_format; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -3897,8 +3887,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + + if (d3d12_resource_is_buffer(dst_resource)) + { +- assert(d3d12_resource_is_buffer(src_resource)); +- assert(src_resource->desc.Width == dst_resource->desc.Width); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); ++ VKD3D_ASSERT(src_resource->desc.Width == dst_resource->desc.Width); + + vk_buffer_copy.srcOffset = 0; + vk_buffer_copy.dstOffset = 0; +@@ -3912,10 +3902,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + dst_format = dst_resource->format; + src_format = src_resource->format; + +- assert(d3d12_resource_is_texture(dst_resource)); +- assert(d3d12_resource_is_texture(src_resource)); +- assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); +- assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); ++ VKD3D_ASSERT(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); ++ VKD3D_ASSERT(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); + + if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) + { +@@ -3941,7 +3931,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, + const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, + D3D12_TILE_COPY_FLAGS flags) +@@ -3952,11 +3942,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand + buffer, buffer_offset, flags); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *dst, UINT dst_sub_resource_idx, + ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_format *src_format, *dst_format, *vk_format; + struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -3972,8 +3962,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi + dst_resource = unsafe_impl_from_ID3D12Resource(dst); + src_resource = unsafe_impl_from_ID3D12Resource(src); + +- assert(d3d12_resource_is_texture(dst_resource)); +- assert(d3d12_resource_is_texture(src_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); + + d3d12_command_list_track_resource_usage(list, dst_resource); + d3d12_command_list_track_resource_usage(list, src_resource); +@@ -4019,10 +4009,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList6 *iface, + D3D12_PRIMITIVE_TOPOLOGY topology) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, topology %#x.\n", iface, topology); + +@@ -4033,11 +4023,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr + d3d12_command_list_invalidate_current_pipeline(list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList6 *iface, + UINT viewport_count, const D3D12_VIEWPORT *viewports) + { + VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; + +@@ -4071,10 +4061,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo + VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList6 *iface, + UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; +@@ -4099,10 +4089,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic + VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList6 *iface, + const FLOAT blend_factor[4]) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); +@@ -4111,10 +4101,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics + VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList6 *iface, + UINT stencil_ref) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + + TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); +@@ -4123,11 +4113,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC + VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList6 *iface, + ID3D12PipelineState *pipeline_state) + { + struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); + +@@ -4178,10 +4168,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA + return 0; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList6 *iface, + UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + bool have_aliasing_barriers = false, have_split_barriers = false; + const struct vkd3d_vk_device_procs *vk_procs; + const struct vkd3d_vulkan_info *vk_info; +@@ -4277,13 +4267,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC + } + + if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, +- resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) ++ resource, list->vk_queue_flags, vk_info, &src_access_mask, ++ &src_stage_mask, &layout_before, list->device)) + { + FIXME("Unhandled state %#x.\n", state_before); + continue; + } + if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, +- resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) ++ resource, list->vk_queue_flags, vk_info, &dst_access_mask, ++ &dst_stage_mask, &layout_after, list->device)) + { + FIXME("Unhandled state %#x.\n", state_after); + continue; +@@ -4303,7 +4295,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC + + resource = unsafe_impl_from_ID3D12Resource(uav->pResource); + vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, +- resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); ++ resource, list->vk_queue_flags, vk_info, &access_mask, ++ &stage_mask, &image_layout, list->device); + src_access_mask = dst_access_mask = access_mask; + src_stage_mask = dst_stage_mask = stage_mask; + layout_before = layout_after = image_layout; +@@ -4404,13 +4397,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC + WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList6 *iface, + ID3D12GraphicsCommandList *command_list) + { + FIXME("iface %p, command_list %p stub!\n", iface, command_list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList6 *iface, + UINT heap_count, ID3D12DescriptorHeap *const *heaps) + { + TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); +@@ -4436,10 +4429,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis + d3d12_command_list_invalidate_root_parameters(list, bind_point); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList6 *iface, + ID3D12RootSignature *root_signature) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + +@@ -4447,10 +4440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G + unsafe_impl_from_ID3D12RootSignature(root_signature)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList6 *iface, + ID3D12RootSignature *root_signature) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_signature %p.\n", iface, root_signature); + +@@ -4466,9 +4459,9 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l + struct d3d12_descriptor_heap *descriptor_heap; + struct d3d12_desc *desc; + +- assert(root_signature_get_descriptor_table(root_signature, index)); ++ VKD3D_ASSERT(root_signature_get_descriptor_table(root_signature, index)); + +- assert(index < ARRAY_SIZE(bindings->descriptor_tables)); ++ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->descriptor_tables)); + desc = d3d12_desc_from_gpu_handle(base_descriptor); + + if (bindings->descriptor_tables[index] == desc) +@@ -4489,10 +4482,10 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l + bindings->descriptor_table_active_mask |= (uint64_t)1 << index; + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", + iface, root_parameter_index, debug_gpu_handle(base_descriptor)); +@@ -4501,10 +4494,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I + root_parameter_index, base_descriptor); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, + UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", + iface, root_parameter_index, debug_gpu_handle(base_descriptor)); +@@ -4526,10 +4519,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis + c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); +@@ -4538,10 +4531,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 + root_parameter_index, dst_offset, 1, &data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, + UINT root_parameter_index, UINT data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", + iface, root_parameter_index, data, dst_offset); +@@ -4550,10 +4543,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID + root_parameter_index, dst_offset, 1, &data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); +@@ -4562,10 +4555,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID + root_parameter_index, dst_offset, constant_count, data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, + UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", + iface, root_parameter_index, constant_count, data, dst_offset); +@@ -4587,7 +4580,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + struct d3d12_resource *resource; + + root_parameter = root_signature_get_root_descriptor(root_signature, index); +- assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); ++ VKD3D_ASSERT(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); + + if (gpu_address) + { +@@ -4618,7 +4611,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); + +- assert(index < ARRAY_SIZE(bindings->push_descriptors)); ++ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); + bindings->push_descriptors[index].u.cbv.vk_buffer = buffer_info.buffer; + bindings->push_descriptors[index].u.cbv.offset = buffer_info.offset; + bindings->push_descriptor_dirty_mask |= 1u << index; +@@ -4627,9 +4620,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( +- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4638,9 +4631,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( +- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4661,7 +4654,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + VkBufferView vk_buffer_view; + + root_parameter = root_signature_get_root_descriptor(root_signature, index); +- assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); ++ VKD3D_ASSERT(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); + + /* FIXME: Re-use buffer views. */ + if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) +@@ -4691,7 +4684,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); + +- assert(index < ARRAY_SIZE(bindings->push_descriptors)); ++ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); + bindings->push_descriptors[index].u.vk_buffer_view = vk_buffer_view; + bindings->push_descriptor_dirty_mask |= 1u << index; + bindings->push_descriptor_active_mask |= 1u << index; +@@ -4699,9 +4692,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( +- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4711,9 +4704,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( +- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4723,9 +4716,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( +- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4735,9 +4728,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi + } + + static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( +- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) ++ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + + TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", + iface, root_parameter_index, address); +@@ -4746,10 +4739,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV + root_parameter_index, address); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList6 *iface, + const D3D12_INDEX_BUFFER_VIEW *view) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_vk_device_procs *vk_procs; + struct d3d12_resource *resource; + enum VkIndexType index_type; +@@ -4789,10 +4782,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics + view->BufferLocation - resource->gpu_address, index_type)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList6 *iface, + UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct vkd3d_null_resources *null_resources; + struct vkd3d_gpu_va_allocator *gpu_va_allocator; + VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; +@@ -4814,15 +4807,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + return; + } + +- if (!views) +- { +- WARN("NULL \"views\" pointer specified.\n"); +- return; +- } +- + for (i = 0; i < view_count; ++i) + { +- if (views[i].BufferLocation) ++ if (views && views[i].BufferLocation) + { + resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); + buffers[i] = resource->u.vk_buffer; +@@ -4847,10 +4834,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi + d3d12_command_list_invalidate_current_pipeline(list); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList6 *iface, + UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; + VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; + VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; +@@ -4912,11 +4899,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm + VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList6 *iface, + UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, + BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct d3d12_rtv_desc *rtv_desc; + const struct d3d12_dsv_desc *dsv_desc; + VkFormat prev_dsv_format; +@@ -5117,12 +5104,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList6 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, + UINT rect_count, const D3D12_RECT *rects) + { + const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference ds_reference; +@@ -5166,10 +5153,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra + &clear_value, rect_count, rects); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList6 *iface, + D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); + struct VkAttachmentDescription attachment_desc; + struct VkAttachmentReference color_reference; +@@ -5288,11 +5275,13 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, + struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, + unsigned int rect_count, const D3D12_RECT *rects) + { ++ const VkPhysicalDeviceLimits *device_limits = &list->device->vk_info.device_limits; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + unsigned int i, miplevel_idx, layer_count; + struct vkd3d_uav_clear_pipeline pipeline; + struct vkd3d_uav_clear_args clear_args; + const struct vkd3d_resource_view *view; ++ uint32_t count_x, count_y, count_z; + VkDescriptorImageInfo image_info; + D3D12_RECT full_rect, curr_rect; + VkWriteDescriptorSet write_set; +@@ -5383,18 +5372,32 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, + if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom) + continue; + +- clear_args.offset.x = curr_rect.left; + clear_args.offset.y = curr_rect.top; +- clear_args.extent.width = curr_rect.right - curr_rect.left; + clear_args.extent.height = curr_rect.bottom - curr_rect.top; + +- VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, +- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); ++ count_y = vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height); ++ count_z = vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth); ++ if (count_y > device_limits->maxComputeWorkGroupCount[1]) ++ FIXME("Group Y count %u exceeds max %u.\n", count_y, device_limits->maxComputeWorkGroupCount[1]); ++ if (count_z > device_limits->maxComputeWorkGroupCount[2]) ++ FIXME("Group Z count %u exceeds max %u.\n", count_z, device_limits->maxComputeWorkGroupCount[2]); ++ ++ do ++ { ++ clear_args.offset.x = curr_rect.left; ++ clear_args.extent.width = curr_rect.right - curr_rect.left; ++ ++ count_x = vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width); ++ count_x = min(count_x, device_limits->maxComputeWorkGroupCount[0]); + +- VK_CALL(vkCmdDispatch(list->vk_command_buffer, +- vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width), +- vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height), +- vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth))); ++ VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, ++ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); ++ ++ VK_CALL(vkCmdDispatch(list->vk_command_buffer, count_x, count_y, count_z)); ++ ++ curr_rect.left += count_x * pipeline.group_size.width; ++ } ++ while (curr_rect.right > curr_rect.left); + } + } + +@@ -5434,15 +5437,59 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, ++static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const struct vkd3d_resource_view *view, ++ struct d3d12_resource *resource, VkClearColorValue *colour) ++{ ++ struct vkd3d_texture_view_desc view_desc; ++ const struct vkd3d_format *uint_format; ++ struct vkd3d_view *uint_view; ++ ++ if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) ++ && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, colour))) ++ { ++ ERR("Unhandled format %#x.\n", view->format->dxgi_format); ++ return NULL; ++ } ++ ++ if (d3d12_resource_is_buffer(resource)) ++ { ++ if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_buffer, ++ uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) ++ { ++ ERR("Failed to create buffer view.\n"); ++ return NULL; ++ } ++ ++ return uint_view; ++ } ++ ++ memset(&view_desc, 0, sizeof(view_desc)); ++ view_desc.view_type = view->info.texture.vk_view_type; ++ view_desc.format = uint_format; ++ view_desc.miplevel_idx = view->info.texture.miplevel_idx; ++ view_desc.miplevel_count = 1; ++ view_desc.layer_idx = view->info.texture.layer_idx; ++ view_desc.layer_count = view->info.texture.layer_count; ++ view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; ++ view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; ++ ++ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, ++ resource->u.vk_image, &view_desc, &uint_view)) ++ { ++ ERR("Failed to create image view.\n"); ++ return NULL; ++ } ++ ++ return uint_view; ++} ++ ++static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList6 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const UINT values[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; + struct d3d12_device *device = list->device; +- struct vkd3d_texture_view_desc view_desc; +- const struct vkd3d_format *uint_format; + const struct vkd3d_resource_view *view; + struct d3d12_resource *resource_impl; + VkClearColorValue colour; +@@ -5456,44 +5503,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + view = &descriptor->v; + memcpy(colour.uint32, values, sizeof(colour.uint32)); + +- if (view->format->type != VKD3D_FORMAT_TYPE_UINT) ++ if (view->format->type != VKD3D_FORMAT_TYPE_UINT ++ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) + { +- if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) +- && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) +- { +- ERR("Unhandled format %#x.\n", view->format->dxgi_format); +- return; +- } +- +- if (d3d12_resource_is_buffer(resource_impl)) +- { +- if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, +- uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) +- { +- ERR("Failed to create buffer view.\n"); +- return; +- } +- } +- else +- { +- memset(&view_desc, 0, sizeof(view_desc)); +- view_desc.view_type = view->info.texture.vk_view_type; +- view_desc.format = uint_format; +- view_desc.miplevel_idx = view->info.texture.miplevel_idx; +- view_desc.miplevel_count = 1; +- view_desc.layer_idx = view->info.texture.layer_idx; +- view_desc.layer_count = view->info.texture.layer_count; +- view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; +- view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; +- +- if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, +- &uint_view)) +- { +- ERR("Failed to create image view.\n"); +- return; +- } +- } +- descriptor = uint_view; ++ ERR("Failed to create UINT view.\n"); ++ return; + } + + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); +@@ -5502,36 +5516,49 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID + vkd3d_view_decref(uint_view, device); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList6 *iface, + D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, + const float values[4], UINT rect_count, const D3D12_RECT *rects) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); ++ struct vkd3d_view *descriptor, *uint_view = NULL; ++ struct d3d12_device *device = list->device; ++ const struct vkd3d_resource_view *view; + struct d3d12_resource *resource_impl; + VkClearColorValue colour; +- struct vkd3d_view *view; + + TRACE("iface %p, gpu_handle %s, cpu_handle %s, resource %p, values %p, rect_count %u, rects %p.\n", + iface, debug_gpu_handle(gpu_handle), debug_cpu_handle(cpu_handle), resource, values, rect_count, rects); + + resource_impl = unsafe_impl_from_ID3D12Resource(resource); +- if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) ++ if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + return; ++ view = &descriptor->v; + memcpy(colour.float32, values, sizeof(colour.float32)); + +- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); ++ if (view->format->type == VKD3D_FORMAT_TYPE_SINT ++ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) ++ { ++ ERR("Failed to create UINT view.\n"); ++ return; ++ } ++ ++ d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); ++ ++ if (uint_view) ++ vkd3d_view_decref(uint_view, device); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) + { + FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList6 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + VkQueryControlFlags flags = 0; +@@ -5558,10 +5585,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman + VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList6 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); + const struct vkd3d_vk_device_procs *vk_procs; + +@@ -5603,12 +5630,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) + return sizeof(uint64_t); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList6 *iface, + ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, + ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) + { + const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i, first, count; +@@ -5684,10 +5711,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); + const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; + const struct vkd3d_vk_device_procs *vk_procs; +@@ -5756,19 +5783,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList6 *iface, + UINT metadata, const void *data, UINT size) + { + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList6 *iface, + UINT metadata, const void *data, UINT size) + { + FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList5 *iface) ++static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList6 *iface) + { + FIXME("iface %p stub!\n", iface); + } +@@ -5777,14 +5804,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN + STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); + STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); + +-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList6 *iface, + ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, + UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) + { + struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); + struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); + struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; + const struct vkd3d_vk_device_procs *vk_procs; + unsigned int i; +@@ -5883,7 +5910,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC + d3d12_command_signature_decref(sig_impl); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, +@@ -5896,7 +5923,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *dst_buffer, UINT64 dst_offset, + ID3D12Resource *src_buffer, UINT64 src_offset, + UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, +@@ -5909,20 +5936,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr + dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, + FLOAT min, FLOAT max) + { + FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, + UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) + { + FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", + iface, sample_count, pixel_count, sample_positions); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, + ID3D12Resource *src_resource, UINT src_sub_resource_idx, + D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) +@@ -5934,16 +5961,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 + src_resource, src_sub_resource_idx, src_rect, format, mode); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList5 *iface, UINT mask) ++static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList6 *iface, UINT mask) + { + FIXME("iface %p, mask %#x stub!\n", iface, mask); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList6 *iface, + UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, + const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) + { +- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); ++ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct d3d12_resource *resource; + unsigned int i; + +@@ -5956,13 +5983,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap + } + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList6 *iface, + ID3D12ProtectedResourceSession *protected_session) + { + FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList6 *iface, + UINT count, const D3D12_RENDER_PASS_RENDER_TARGET_DESC *render_targets, + const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC *depth_stencil, D3D12_RENDER_PASS_FLAGS flags) + { +@@ -5970,74 +5997,78 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsC + count, render_targets, depth_stencil, flags); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList5 *iface) ++static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList6 *iface) + { + FIXME("iface %p stub!\n", iface); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList6 *iface, + ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) + { + FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, + meta_command, parameters_data, (uintptr_t)data_size_in_bytes); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList6 *iface, + ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) + { + FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, + meta_command, parameters_data, (uintptr_t)data_size_in_bytes); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, + const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *desc, UINT count, + const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *postbuild_info_descs) + { + FIXME("iface %p, desc %p, count %u, postbuild_info_descs %p stub!\n", iface, desc, count, postbuild_info_descs); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo(ID3D12GraphicsCommandList5 *iface, +- const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, ++static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo( ++ ID3D12GraphicsCommandList6 *iface, const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, + UINT structures_count, const D3D12_GPU_VIRTUAL_ADDRESS *src_structure_data) + { + FIXME("iface %p, desc %p, structures_count %u, src_structure_data %p stub!\n", + iface, desc, structures_count, src_structure_data); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, +- D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, +- D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, ++static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, ++ D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode) + { + FIXME("iface %p, dst_structure_data %#"PRIx64", src_structure_data %#"PRIx64", mode %u stub!\n", + iface, dst_structure_data, src_structure_data, mode); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList6 *iface, + ID3D12StateObject *state_object) + { + FIXME("iface %p, state_object %p stub!\n", iface, state_object); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList6 *iface, + const D3D12_DISPATCH_RAYS_DESC *desc) + { + FIXME("iface %p, desc %p stub!\n", iface, desc); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList6 *iface, + D3D12_SHADING_RATE rate, const D3D12_SHADING_RATE_COMBINER *combiners) + { + FIXME("iface %p, rate %#x, combiners %p stub!\n", iface, rate, combiners); + } + +-static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList5 *iface, ++static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList6 *iface, + ID3D12Resource *rate_image) + { + FIXME("iface %p, rate_image %p stub!\n", iface, rate_image); + } + +-static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = ++static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh(ID3D12GraphicsCommandList6 *iface, UINT x, UINT y, UINT z) ++{ ++ FIXME("iface %p, x %u, y %u, z %u stub!\n", iface, x, y, z); ++} ++ ++static const struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl = + { + /* IUnknown methods */ + d3d12_command_list_QueryInterface, +@@ -6128,14 +6159,16 @@ static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = + /* ID3D12GraphicsCommandList5 methods */ + d3d12_command_list_RSSetShadingRate, + d3d12_command_list_RSSetShadingRateImage, ++ /* ID3D12GraphicsCommandList6 methods */ ++ d3d12_command_list_DispatchMesh, + }; + + static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); +- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); ++ VKD3D_ASSERT(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); ++ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); + } + + static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, +@@ -6144,7 +6177,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d + { + HRESULT hr; + +- list->ID3D12GraphicsCommandList5_iface.lpVtbl = &d3d12_command_list_vtbl; ++ list->ID3D12GraphicsCommandList6_iface.lpVtbl = &d3d12_command_list_vtbl; + list->refcount = 1; + + list->type = type; +@@ -6748,7 +6781,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu + } + + vk_semaphore = fence->timeline_semaphore; +- assert(vk_semaphore); ++ VKD3D_ASSERT(vk_semaphore); + } + else + { +@@ -6821,7 +6854,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu + return hr; + + vk_semaphore = fence->timeline_semaphore; +- assert(vk_semaphore); ++ VKD3D_ASSERT(vk_semaphore); + + return vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, + vk_semaphore, fence, timeline_value, vkd3d_queue); +@@ -6990,7 +7023,7 @@ static HRESULT d3d12_command_queue_wait_locked(struct d3d12_command_queue *comma + * until we have submitted, so the semaphore cannot be destroyed before the call to vkQueueSubmit. */ + vkd3d_mutex_unlock(&fence->mutex); + +- assert(fence->timeline_semaphore); ++ VKD3D_ASSERT(fence->timeline_semaphore); + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.waitSemaphoreValueCount = 1; +@@ -7254,7 +7287,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * + + queue->is_flushing = true; + +- assert(queue->aux_op_queue.count == 0); ++ VKD3D_ASSERT(queue->aux_op_queue.count == 0); + + while (queue->op_queue.count != 0) + { +@@ -7544,7 +7577,7 @@ struct d3d12_command_signature *unsafe_impl_from_ID3D12CommandSignature(ID3D12Co + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == &d3d12_command_signature_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_signature_vtbl); + return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface); + } + +diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c +index cfc9c5f5ed3..01841c89692 100644 +--- a/libs/vkd3d/libs/vkd3d/device.c ++++ b/libs/vkd3d/libs/vkd3d/device.c +@@ -76,6 +76,14 @@ static const char * const required_device_extensions[] = + VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, + }; + ++/* In general we don't want to enable Vulkan beta extensions, but make an ++ * exception for VK_KHR_portability_subset because we draw no real feature from ++ * it, but it's still useful to be able to develop for MoltenVK without being ++ * spammed with validation errors. */ ++#ifndef VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME ++#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" ++#endif ++ + static const struct vkd3d_optional_extension_info optional_device_extensions[] = + { + /* KHR extensions */ +@@ -85,6 +93,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = + VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), + VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), + VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), ++ VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), + VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), + VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), +@@ -92,7 +101,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = + VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), + VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), + VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), +- VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), ++ VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), + VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), + VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), +@@ -299,7 +308,7 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio + for (i = 0; i < required_extension_count; ++i) + { + if (!has_extension(extensions, count, required_extensions[i])) +- ERR("Required %s extension %s is not supported.\n", ++ WARN("Required %s extension %s is not supported.\n", + extension_type, debugstr_a(required_extensions[i])); + ++extension_count; + } +@@ -327,12 +336,12 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio + for (i = 0; i < user_extension_count; ++i) + { + if (!has_extension(extensions, count, user_extensions[i])) +- ERR("Required user %s extension %s is not supported.\n", ++ WARN("Required user %s extension %s is not supported.\n", + extension_type, debugstr_a(user_extensions[i])); + ++extension_count; + } + +- assert(!optional_user_extension_count || user_extension_supported); ++ VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); + for (i = 0; i < optional_user_extension_count; ++i) + { + if (has_extension(extensions, count, optional_user_extensions[i])) +@@ -394,7 +403,7 @@ static unsigned int vkd3d_enable_extensions(const char *extensions[], + { + extension_count = vkd3d_append_extension(extensions, extension_count, user_extensions[i]); + } +- assert(!optional_user_extension_count || user_extension_supported); ++ VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); + for (i = 0; i < optional_user_extension_count; ++i) + { + if (!user_extension_supported[i]) +@@ -575,7 +584,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + + if (!create_info->pfn_signal_event) + { +- ERR("Invalid signal event function pointer.\n"); ++ WARN("Invalid signal event function pointer.\n"); + return E_INVALIDARG; + } + if (!create_info->pfn_create_thread != !create_info->pfn_join_thread) +@@ -585,7 +594,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, + } + if (create_info->wchar_size != 2 && create_info->wchar_size != 4) + { +- ERR("Unexpected WCHAR size %zu.\n", create_info->wchar_size); ++ WARN("Unexpected WCHAR size %zu.\n", create_info->wchar_size); + return E_INVALIDARG; + } + +@@ -822,114 +831,90 @@ struct vkd3d_physical_device_info + VkPhysicalDeviceFeatures2 features2; + }; + +-static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) ++static void vkd3d_chain_physical_device_info_structures(struct vkd3d_physical_device_info *info, ++ struct d3d12_device *device) + { +- const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; +- VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; +- VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; +- VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; +- VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; +- VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; +- VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; +- VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; +- VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; +- VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; +- VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; +- VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; +- VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *mutable_features; +- VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; +- VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; +- VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; +- VkPhysicalDevice physical_device = device->vk_physical_device; +- VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; +- VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; +- VkPhysicalDeviceSubgroupProperties *subgroup_properties; +- +- memset(info, 0, sizeof(*info)); +- conditional_rendering_features = &info->conditional_rendering_features; +- depth_clip_features = &info->depth_clip_features; +- descriptor_indexing_features = &info->descriptor_indexing_features; +- fragment_shader_interlock_features = &info->fragment_shader_interlock_features; +- robustness2_features = &info->robustness2_features; +- descriptor_indexing_properties = &info->descriptor_indexing_properties; +- maintenance3_properties = &info->maintenance3_properties; +- demote_features = &info->demote_features; +- buffer_alignment_features = &info->texel_buffer_alignment_features; +- buffer_alignment_properties = &info->texel_buffer_alignment_properties; +- vertex_divisor_features = &info->vertex_divisor_features; +- vertex_divisor_properties = &info->vertex_divisor_properties; +- timeline_semaphore_features = &info->timeline_semaphore_features; +- mutable_features = &info->mutable_features; +- formats4444_features = &info->formats4444_features; +- xfb_features = &info->xfb_features; +- xfb_properties = &info->xfb_properties; +- subgroup_properties = &info->subgroup_properties; + +- info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; ++ info->features2.pNext = NULL; + +- conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; + if (vulkan_info->EXT_conditional_rendering) +- vk_prepend_struct(&info->features2, conditional_rendering_features); +- depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->conditional_rendering_features); + if (vulkan_info->EXT_depth_clip_enable) +- vk_prepend_struct(&info->features2, depth_clip_features); +- descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->depth_clip_features); + if (vulkan_info->EXT_descriptor_indexing) +- vk_prepend_struct(&info->features2, descriptor_indexing_features); +- fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->descriptor_indexing_features); + if (vulkan_info->EXT_fragment_shader_interlock) +- vk_prepend_struct(&info->features2, fragment_shader_interlock_features); +- robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->fragment_shader_interlock_features); + if (vulkan_info->EXT_robustness2) +- vk_prepend_struct(&info->features2, robustness2_features); +- demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->robustness2_features); + if (vulkan_info->EXT_shader_demote_to_helper_invocation) +- vk_prepend_struct(&info->features2, demote_features); +- buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->demote_features); + if (vulkan_info->EXT_texel_buffer_alignment) +- vk_prepend_struct(&info->features2, buffer_alignment_features); +- xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->texel_buffer_alignment_features); + if (vulkan_info->EXT_transform_feedback) +- vk_prepend_struct(&info->features2, xfb_features); +- vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->xfb_features); + if (vulkan_info->EXT_vertex_attribute_divisor) +- vk_prepend_struct(&info->features2, vertex_divisor_features); +- timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; ++ vk_prepend_struct(&info->features2, &info->vertex_divisor_features); + if (vulkan_info->KHR_timeline_semaphore) +- vk_prepend_struct(&info->features2, timeline_semaphore_features); +- mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->timeline_semaphore_features); + if (vulkan_info->EXT_mutable_descriptor_type) +- vk_prepend_struct(&info->features2, mutable_features); +- formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; ++ vk_prepend_struct(&info->features2, &info->mutable_features); + if (vulkan_info->EXT_4444_formats) +- vk_prepend_struct(&info->features2, formats4444_features); +- +- if (vulkan_info->KHR_get_physical_device_properties2) +- VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); +- else +- VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); ++ vk_prepend_struct(&info->features2, &info->formats4444_features); + +- info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; ++ info->properties2.pNext = NULL; + +- maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; + if (vulkan_info->KHR_maintenance3) +- vk_prepend_struct(&info->properties2, maintenance3_properties); +- descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; ++ vk_prepend_struct(&info->properties2, &info->maintenance3_properties); + if (vulkan_info->EXT_descriptor_indexing) +- vk_prepend_struct(&info->properties2, descriptor_indexing_properties); +- buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; ++ vk_prepend_struct(&info->properties2, &info->descriptor_indexing_properties); + if (vulkan_info->EXT_texel_buffer_alignment) +- vk_prepend_struct(&info->properties2, buffer_alignment_properties); +- xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; ++ vk_prepend_struct(&info->properties2, &info->texel_buffer_alignment_properties); + if (vulkan_info->EXT_transform_feedback) +- vk_prepend_struct(&info->properties2, xfb_properties); +- vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; ++ vk_prepend_struct(&info->properties2, &info->xfb_properties); + if (vulkan_info->EXT_vertex_attribute_divisor) +- vk_prepend_struct(&info->properties2, vertex_divisor_properties); +- subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; ++ vk_prepend_struct(&info->properties2, &info->vertex_divisor_properties); + if (d3d12_device_environment_is_vulkan_min_1_1(device)) +- vk_prepend_struct(&info->properties2, subgroup_properties); ++ vk_prepend_struct(&info->properties2, &info->subgroup_properties); ++} ++ ++static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) ++{ ++ const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; ++ VkPhysicalDevice physical_device = device->vk_physical_device; ++ struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; ++ ++ memset(info, 0, sizeof(*info)); ++ ++ info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; ++ info->conditional_rendering_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; ++ info->depth_clip_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; ++ info->descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; ++ info->fragment_shader_interlock_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; ++ info->robustness2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; ++ info->demote_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; ++ info->texel_buffer_alignment_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; ++ info->xfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; ++ info->vertex_divisor_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; ++ info->timeline_semaphore_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; ++ info->mutable_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; ++ info->formats4444_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; ++ ++ info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; ++ info->maintenance3_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; ++ info->descriptor_indexing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; ++ info->texel_buffer_alignment_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; ++ info->xfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; ++ info->vertex_divisor_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; ++ info->subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; ++ ++ vkd3d_chain_physical_device_info_structures(info, device); ++ ++ if (vulkan_info->KHR_get_physical_device_properties2) ++ VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); ++ else ++ VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); + + if (vulkan_info->KHR_get_physical_device_properties2) + VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); +@@ -1522,7 +1507,7 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct + for (i = 0; i < ARRAY_SIZE(additional_formats); ++i) + { + format = vkd3d_get_format(device, additional_formats[i], false); +- assert(format); ++ VKD3D_ASSERT(format); + + VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, format->vk_format, &properties)); + if (!((properties.linearTilingFeatures | properties.optimalTilingFeatures) & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) +@@ -1634,6 +1619,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + + vulkan_info->device_limits = physical_device_info->properties2.properties.limits; + vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; ++ vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; ++ vulkan_info->tessellation_shaders = physical_device_info->features2.features.tessellationShader; + vulkan_info->sparse_binding = features->sparseBinding; + vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; + vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; +@@ -1829,6 +1816,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits); + ++ vkd3d_chain_physical_device_info_structures(physical_device_info, device); ++ + return S_OK; + } + +@@ -2166,7 +2155,7 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, + vkd3d_free(extensions); + if (vr < 0) + { +- ERR("Failed to create Vulkan device, vr %d.\n", vr); ++ WARN("Failed to create Vulkan device, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + +@@ -2552,11 +2541,13 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) + VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); + pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; +- device->vk_pool_count = 2; ++ pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; ++ pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); ++ device->vk_pool_count = 3; + return; + } + +- assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); ++ VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); + pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, + VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); +@@ -3128,8 +3119,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *i + initial_pipeline_state, &object))) + return hr; + +- return return_interface(&object->ID3D12GraphicsCommandList5_iface, +- &IID_ID3D12GraphicsCommandList5, riid, command_list); ++ return return_interface(&object->ID3D12GraphicsCommandList6_iface, ++ &IID_ID3D12GraphicsCommandList6, riid, command_list); + } + + /* Direct3D feature levels restrict which formats can be optionally supported. */ +@@ -3806,7 +3797,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 + return E_INVALIDARG; + } + +- data->UnalignedBlockTexturesSupported = FALSE; ++ /* Vulkan does not restrict block texture alignment. */ ++ data->UnalignedBlockTexturesSupported = TRUE; + + TRACE("Unaligned block texture support %#x.\n", data->UnalignedBlockTexturesSupported); + return S_OK; +@@ -5262,7 +5254,7 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == &d3d12_device_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == &d3d12_device_vtbl); + return impl_from_ID3D12Device9(iface); + } + +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index c897d9f2c5a..ac29088b9cb 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -312,7 +312,7 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface) + + TRACE("%p increasing refcount to %u.\n", heap, refcount); + +- assert(!heap->is_private); ++ VKD3D_ASSERT(!heap->is_private); + + return refcount; + } +@@ -443,7 +443,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == &d3d12_heap_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == &d3d12_heap_vtbl); + return impl_from_ID3D12Heap(iface); + } + +@@ -950,8 +950,8 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, + bool tiled; + HRESULT hr; + +- assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); +- assert(d3d12_resource_validate_desc(desc, device) == S_OK); ++ VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); ++ VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); + + if (!desc->MipLevels) + { +@@ -1044,7 +1044,7 @@ static bool d3d12_resource_validate_box(const struct d3d12_resource *resource, + depth = d3d12_resource_desc_get_depth(&resource->desc, mip_level); + + vkd3d_format = resource->format; +- assert(vkd3d_format); ++ VKD3D_ASSERT(vkd3d_format); + width_mask = vkd3d_format->block_width - 1; + height_mask = vkd3d_format->block_height - 1; + +@@ -1162,7 +1162,7 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 + + if (d3d12_resource_is_buffer(resource)) + { +- assert(subresource_count == 1); ++ VKD3D_ASSERT(subresource_count == 1); + + VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); + if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) +@@ -1381,7 +1381,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource2 *iface + + static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) + { +- assert(resource->heap->map_ptr); ++ VKD3D_ASSERT(resource->heap->map_ptr); + return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; + } + +@@ -1771,7 +1771,7 @@ struct d3d12_resource *unsafe_impl_from_ID3D12Resource(ID3D12Resource *iface) + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); + return impl_from_ID3D12Resource(iface); + } + +@@ -1809,14 +1809,6 @@ static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC1 *d + return false; + } + +- if (align(desc->Width, format->block_width) != desc->Width +- || align(desc->Height, format->block_height) != desc->Height) +- { +- WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", +- desc->Width, desc->Height, desc->Format); +- return false; +- } +- + return true; + } + +@@ -2173,7 +2165,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, + + if (heap_offset > heap->desc.SizeInBytes || requirements.size > heap->desc.SizeInBytes - heap_offset) + { +- ERR("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", ++ WARN("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", + heap_offset, requirements.size, heap->desc.SizeInBytes); + return E_INVALIDARG; + } +@@ -2414,7 +2406,7 @@ static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_ + { + struct vkd3d_view *view; + +- assert(magic); ++ VKD3D_ASSERT(magic); + + if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) + { +@@ -2544,7 +2536,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea + writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; + break; + default: +- assert(false); ++ VKD3D_ASSERT(false); + break; + } + if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) +@@ -2733,7 +2725,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struc + { + struct d3d12_desc tmp; + +- assert(dst != src); ++ VKD3D_ASSERT(dst != src); + + tmp.s.u.object = d3d12_desc_get_object_ref(src, device); + descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); +@@ -2756,7 +2748,7 @@ static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12 + if (properties->storageTexelBufferOffsetSingleTexelAlignment + && properties->uniformTexelBufferOffsetSingleTexelAlignment) + { +- assert(!vkd3d_format_is_compressed(format)); ++ VKD3D_ASSERT(!vkd3d_format_is_compressed(format)); + return min(format->byte_count, alignment); + } + +@@ -2856,7 +2848,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, + return false; + } + +- assert(d3d12_resource_is_buffer(resource)); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); + + return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, + format, offset * element_size, size * element_size, view); +@@ -2987,7 +2979,7 @@ static VkComponentSwizzle swizzle_vk_component(const VkComponentMapping *compone + break; + } + +- assert(component != VK_COMPONENT_SWIZZLE_IDENTITY); ++ VKD3D_ASSERT(component != VK_COMPONENT_SWIZZLE_IDENTITY); + return component; + } + +@@ -3519,8 +3511,8 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ + { + const struct vkd3d_format *format; + +- assert(d3d12_resource_is_buffer(counter_resource)); +- assert(desc->u.Buffer.StructureByteStride); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(counter_resource)); ++ VKD3D_ASSERT(desc->u.Buffer.StructureByteStride); + + format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); + if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, +@@ -3640,7 +3632,7 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, + } + + resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); +- assert(d3d12_resource_is_buffer(resource)); ++ VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); + return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, + gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view); + } +@@ -3912,7 +3904,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev + vkd3d_desc.layer_count = resource->desc.DepthOrArraySize; + } + +- assert(d3d12_resource_is_texture(resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); + + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) + return; +@@ -3998,7 +3990,7 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev + } + } + +- assert(d3d12_resource_is_texture(resource)); ++ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); + + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) + return; +@@ -4357,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript + return hr; + + descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); +- d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); ++ if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) ++ { ++ vkd3d_private_store_destroy(&descriptor_heap->private_store); ++ return hr; ++ } + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + + d3d12_device_add_ref(descriptor_heap->device = device); +@@ -4563,7 +4559,7 @@ struct d3d12_query_heap *unsafe_impl_from_ID3D12QueryHeap(ID3D12QueryHeap *iface + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == &d3d12_query_heap_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == &d3d12_query_heap_vtbl); + return impl_from_ID3D12QueryHeap(iface); + } + +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 7197193523d..0bdb7ea524d 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -194,7 +194,7 @@ struct d3d12_root_signature *unsafe_impl_from_ID3D12RootSignature(ID3D12RootSign + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == &d3d12_root_signature_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == &d3d12_root_signature_vtbl); + return impl_from_ID3D12RootSignature(iface); + } + +@@ -345,15 +345,93 @@ struct d3d12_root_signature_info + unsigned int sampler_unbounded_range_count; + + size_t cost; ++ ++ struct d3d12_root_signature_info_range ++ { ++ enum vkd3d_shader_descriptor_type type; ++ unsigned int space; ++ unsigned int base_idx; ++ unsigned int count; ++ D3D12_SHADER_VISIBILITY visibility; ++ } *ranges; ++ size_t range_count, range_capacity; + }; + ++static HRESULT d3d12_root_signature_info_add_range(struct d3d12_root_signature_info *info, ++ enum vkd3d_shader_descriptor_type type, D3D12_SHADER_VISIBILITY visibility, ++ unsigned int space, unsigned int base_idx, unsigned int count) ++{ ++ struct d3d12_root_signature_info_range *range; ++ ++ if (!vkd3d_array_reserve((void **)&info->ranges, &info->range_capacity, info->range_count + 1, ++ sizeof(*info->ranges))) ++ return E_OUTOFMEMORY; ++ ++ range = &info->ranges[info->range_count++]; ++ range->type = type; ++ range->space = space; ++ range->base_idx = base_idx; ++ range->count = count; ++ range->visibility = visibility; ++ ++ return S_OK; ++} ++ ++static int d3d12_root_signature_info_range_compare(const void *a, const void *b) ++{ ++ const struct d3d12_root_signature_info_range *range_a = a, *range_b = b; ++ int ret; ++ ++ if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) ++ return ret; ++ ++ if ((ret = vkd3d_u32_compare(range_a->space, range_b->space))) ++ return ret; ++ ++ return vkd3d_u32_compare(range_a->base_idx, range_b->base_idx); ++} ++ ++static HRESULT d3d12_root_signature_info_range_validate(const struct d3d12_root_signature_info_range *ranges, ++ unsigned int count, D3D12_SHADER_VISIBILITY visibility) ++{ ++ const struct d3d12_root_signature_info_range *range, *next; ++ unsigned int i = 0, j; ++ ++ while (i < count) ++ { ++ range = &ranges[i]; ++ ++ for (j = i + 1; j < count; ++j) ++ { ++ next = &ranges[j]; ++ ++ if (range->visibility != D3D12_SHADER_VISIBILITY_ALL ++ && next->visibility != D3D12_SHADER_VISIBILITY_ALL ++ && range->visibility != next->visibility) ++ continue; ++ ++ if (range->type == next->type && range->space == next->space ++ && range->base_idx + range->count > next->base_idx) ++ return E_INVALIDARG; ++ ++ break; ++ } ++ ++ i = j; ++ } ++ ++ return S_OK; ++} ++ + static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, +- const D3D12_ROOT_DESCRIPTOR_TABLE *table, bool use_array) ++ const D3D12_ROOT_PARAMETER *param, bool use_array) + { + bool cbv_unbounded_range = false, srv_unbounded_range = false, uav_unbounded_range = false; ++ const D3D12_ROOT_DESCRIPTOR_TABLE *table = ¶m->u.DescriptorTable; + bool sampler_unbounded_range = false; + bool unbounded = false; + unsigned int i, count; ++ HRESULT hr; + + for (i = 0; i < table->NumDescriptorRanges; ++i) + { +@@ -381,6 +459,12 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig + } + + count = range->NumDescriptors; ++ ++ if (FAILED(hr = d3d12_root_signature_info_add_range(info, ++ vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType), ++ param->ShaderVisibility, range->RegisterSpace, range->BaseShaderRegister, count))) ++ return hr; ++ + if (range->NumDescriptors == UINT_MAX) + { + unbounded = true; +@@ -453,7 +537,7 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i + { + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info, +- &p->u.DescriptorTable, use_array))) ++ p, use_array))) + return hr; + ++info->cost; + break; +@@ -463,23 +547,41 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i + ++info->cbv_count; + ++info->binding_count; + info->cost += 2; ++ if (FAILED(hr = d3d12_root_signature_info_add_range(info, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, ++ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) ++ return hr; + break; ++ + case D3D12_ROOT_PARAMETER_TYPE_SRV: + ++info->root_descriptor_count; + ++info->srv_count; + ++info->binding_count; + info->cost += 2; ++ if (FAILED(hr = d3d12_root_signature_info_add_range(info, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, p->ShaderVisibility, ++ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) ++ return hr; + break; ++ + case D3D12_ROOT_PARAMETER_TYPE_UAV: + ++info->root_descriptor_count; + ++info->uav_count; + ++info->binding_count; + info->cost += 2; ++ if (FAILED(hr = d3d12_root_signature_info_add_range(info, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, p->ShaderVisibility, ++ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) ++ return hr; + break; + + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + ++info->root_constant_count; + info->cost += p->u.Constants.Num32BitValues; ++ if (FAILED(hr = d3d12_root_signature_info_add_range(info, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, ++ p->u.Constants.RegisterSpace, p->u.Constants.ShaderRegister, 1))) ++ return hr; + break; + + default: +@@ -491,6 +593,30 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i + info->binding_count += desc->NumStaticSamplers; + info->sampler_count += desc->NumStaticSamplers; + ++ for (i = 0; i < desc->NumStaticSamplers; ++i) ++ { ++ const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; ++ ++ if (FAILED(hr = d3d12_root_signature_info_add_range(info, ++ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->ShaderVisibility, ++ s->RegisterSpace, s->ShaderRegister, 1))) ++ return hr; ++ } ++ ++ qsort(info->ranges, info->range_count, sizeof(*info->ranges), ++ d3d12_root_signature_info_range_compare); ++ ++ for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i) ++ { ++ if (FAILED(hr = d3d12_root_signature_info_range_validate(info->ranges, info->range_count, i))) ++ return hr; ++ } ++ ++ vkd3d_free(info->ranges); ++ info->ranges = NULL; ++ info->range_count = 0; ++ info->range_capacity = 0; ++ + return S_OK; + } + +@@ -512,7 +638,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) + continue; + +- assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); ++ VKD3D_ASSERT(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); + push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL + : stage_flags_from_visibility(p->ShaderVisibility); + push_constants[p->ShaderVisibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); +@@ -645,7 +771,7 @@ static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_ro + return S_OK; + } + +-static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, ++static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, + enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, + bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, + unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) +@@ -670,33 +796,38 @@ static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature * + } + + if (context->unbounded_offset != UINT_MAX) +- d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); ++ return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); ++ ++ return S_OK; + } + +-static uint32_t d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, ++static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, + enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, + unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, +- enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) ++ enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, ++ uint32_t *first_binding) + { +- uint32_t first_binding; + unsigned int i; ++ HRESULT hr; + + is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV + || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + && duplicate_descriptors; + +- first_binding = context->descriptor_binding; ++ *first_binding = context->descriptor_binding; + for (i = 0; i < binding_count; ++i) + { +- if (duplicate_descriptors) +- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, +- base_register_idx + i, true, shader_visibility, 1, context); ++ if (duplicate_descriptors ++ && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, ++ register_space, base_register_idx + i, true, shader_visibility, 1, context))) ++ return hr; + +- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, +- base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context); ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, ++ base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) ++ return hr; + } +- return first_binding; ++ return S_OK; + } + + static uint32_t vkd3d_descriptor_magic_from_d3d12(D3D12_DESCRIPTOR_RANGE_TYPE type) +@@ -764,6 +895,7 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r + enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); + bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + enum vkd3d_shader_descriptor_type descriptor_type = range->type; ++ HRESULT hr; + + if (range->descriptor_count == UINT_MAX) + context->unbounded_offset = range->offset; +@@ -775,8 +907,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r + return E_NOTIMPL; + ++context->current_binding; + +- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, +- range->base_register_idx, true, shader_visibility, range->vk_binding_count, context); ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, ++ range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) ++ return hr; + } + + if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, +@@ -784,8 +917,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r + return E_NOTIMPL; + ++context->current_binding; + +- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, +- range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context); ++ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, ++ range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) ++ return hr; + + context->unbounded_offset = UINT_MAX; + +@@ -955,20 +1089,6 @@ static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_r + descriptor_offset, is_buffer, shader_visibility, context); + } + +-static int compare_register_range(const void *a, const void *b) +-{ +- const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; +- int ret; +- +- if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) +- return ret; +- +- if ((ret = vkd3d_u32_compare(range_a->register_space, range_b->register_space))) +- return ret; +- +- return vkd3d_u32_compare(range_a->base_register_idx, range_b->base_register_idx); +-} +- + static int compare_descriptor_range(const void *a, const void *b) + { + const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; +@@ -983,25 +1103,6 @@ static int compare_descriptor_range(const void *a, const void *b) + return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX); + } + +-static HRESULT validate_descriptor_register_ranges(const struct d3d12_root_descriptor_table_range *ranges, +- unsigned int count) +-{ +- const struct d3d12_root_descriptor_table_range *range, *prev; +- unsigned int i; +- +- for (i = 1; i < count; ++i) +- { +- range = &ranges[i]; +- prev = &ranges[i - 1]; +- +- if (range->type == prev->type && range->register_space == prev->register_space +- && range->base_register_idx - prev->base_register_idx < prev->descriptor_count) +- return E_INVALIDARG; +- } +- +- return S_OK; +-} +- + static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, + const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, + struct vkd3d_descriptor_set_context *context) +@@ -1062,10 +1163,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + offset += range->NumDescriptors; + } + +- qsort(table->ranges, range_count, sizeof(*table->ranges), compare_register_range); +- if (FAILED(hr = validate_descriptor_register_ranges(table->ranges, range_count))) +- return hr; +- + qsort(table->ranges, range_count, sizeof(*table->ranges), compare_descriptor_range); + + for (j = 0; j < range_count; ++j) +@@ -1130,9 +1227,10 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo + + cur_binding = context->current_binding; + +- vk_binding = d3d12_root_signature_assign_vk_bindings(root_signature, ++ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, + range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, +- shader_visibility, context); ++ shader_visibility, context, &vk_binding))) ++ return hr; + + /* Unroll descriptor range. */ + for (k = 0; k < range->descriptor_count; ++k) +@@ -1175,6 +1273,7 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign + { + VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; + unsigned int i; ++ HRESULT hr; + + root_signature->push_descriptor_mask = 0; + +@@ -1188,10 +1287,11 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign + + root_signature->push_descriptor_mask |= 1u << i; + +- cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, ++ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, + vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), + p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, +- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context); ++ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) ++ return hr; + cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); + cur_binding->descriptorCount = 1; + cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); +@@ -1215,7 +1315,7 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + unsigned int i; + HRESULT hr; + +- assert(root_signature->static_sampler_count == desc->NumStaticSamplers); ++ VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); + for (i = 0; i < desc->NumStaticSamplers; ++i) + { + const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; +@@ -1223,9 +1323,10 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa + if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) + return hr; + +- cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, ++ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, +- vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context); ++ vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) ++ return hr; + cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + cur_binding->descriptorCount = 1; + cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); +@@ -1600,7 +1701,7 @@ static HRESULT vkd3d_render_pass_cache_create_pass_locked(struct vkd3d_render_pa + + have_depth_stencil = key->depth_enable || key->stencil_enable; + rt_count = have_depth_stencil ? key->attachment_count - 1 : key->attachment_count; +- assert(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); ++ VKD3D_ASSERT(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); + + for (index = 0, attachment_index = 0; index < rt_count; ++index) + { +@@ -2140,7 +2241,7 @@ struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12Pipeline + { + if (!iface) + return NULL; +- assert(iface->lpVtbl == &d3d12_pipeline_state_vtbl); ++ VKD3D_ASSERT(iface->lpVtbl == &d3d12_pipeline_state_vtbl); + return impl_from_ID3D12PipelineState(iface); + } + +@@ -2296,7 +2397,7 @@ static HRESULT d3d12_pipeline_state_init_uav_counters(struct d3d12_pipeline_stat + unsigned int i, j; + HRESULT hr; + +- assert(vkd3d_popcount(stage_flags) == 1); ++ VKD3D_ASSERT(vkd3d_popcount(stage_flags) == 1); + + for (i = 0; i < shader_info->descriptor_count; ++i) + { +@@ -2911,7 +3012,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( + + if (dsv_format) + { +- assert(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); ++ VKD3D_ASSERT(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); + key.depth_enable = graphics->ds_desc.depthTestEnable; + key.stencil_enable = graphics->ds_desc.stencilTestEnable; + key.depth_stencil_write = graphics->ds_desc.depthWriteEnable +@@ -2928,7 +3029,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( + if (key.attachment_count != ARRAY_SIZE(key.vk_formats)) + key.vk_formats[ARRAY_SIZE(key.vk_formats) - 1] = VK_FORMAT_UNDEFINED; + for (i = key.attachment_count; i < ARRAY_SIZE(key.vk_formats); ++i) +- assert(key.vk_formats[i] == VK_FORMAT_UNDEFINED); ++ VKD3D_ASSERT(key.vk_formats[i] == VK_FORMAT_UNDEFINED); + + key.padding = 0; + key.sample_count = graphics->ms_desc.rasterizationSamples; +@@ -3476,7 +3577,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s + graphics->ms_desc.pSampleMask = NULL; + if (desc->sample_mask != ~0u) + { +- assert(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); ++ VKD3D_ASSERT(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); + graphics->sample_mask[0] = desc->sample_mask; + graphics->sample_mask[1] = 0xffffffffu; + graphics->ms_desc.pSampleMask = graphics->sample_mask; +@@ -3769,7 +3870,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta + .pDynamicStates = dynamic_states, + }; + +- assert(d3d12_pipeline_state_is_graphics(state)); ++ VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(state)); + + memset(&pipeline_key, 0, sizeof(pipeline_key)); + pipeline_key.topology = topology; +diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c +index 11029c9f5f9..831dc07af56 100644 +--- a/libs/vkd3d/libs/vkd3d/utils.c ++++ b/libs/vkd3d/libs/vkd3d/utils.c +@@ -331,7 +331,7 @@ static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device + + if (j >= current_list->format_count) + { +- assert(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); ++ VKD3D_ASSERT(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); + current_list->vk_formats[current_list->format_count++] = vk_format; + } + } +@@ -427,7 +427,7 @@ static const struct vkd3d_format *vkd3d_get_depth_stencil_format(const struct d3 + const struct vkd3d_format *formats; + unsigned int i; + +- assert(device); ++ VKD3D_ASSERT(device); + formats = device->depth_stencil_formats; + + for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i) +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +index c7431bd821b..9eccec111c7 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c +@@ -38,12 +38,12 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, + } + if (!create_info->instance && !create_info->instance_create_info) + { +- ERR("Instance or instance create info is required.\n"); ++ WARN("Instance or instance create info is required.\n"); + return E_INVALIDARG; + } + if (create_info->instance && create_info->instance_create_info) + { +- ERR("Instance and instance create info are mutually exclusive parameters.\n"); ++ WARN("Instance and instance create info are mutually exclusive parameters.\n"); + return E_INVALIDARG; + } + +@@ -153,7 +153,7 @@ static const D3D12_ROOT_SIGNATURE_DESC * STDMETHODCALLTYPE d3d12_root_signature_ + + TRACE("iface %p.\n", iface); + +- assert(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); ++ VKD3D_ASSERT(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); + return &deserializer->desc.d3d12.u.Desc_1_0; + } + +@@ -354,7 +354,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_Get + } + } + +- assert(deserializer->other_desc.d3d12.Version == version); ++ VKD3D_ASSERT(deserializer->other_desc.d3d12.Version == version); + *desc = &deserializer->other_desc.d3d12; + return S_OK; + } +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index d1fa866d9e3..a4bd2202f39 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -123,6 +123,7 @@ struct vkd3d_vulkan_info + bool KHR_image_format_list; + bool KHR_maintenance2; + bool KHR_maintenance3; ++ bool KHR_portability_subset; + bool KHR_push_descriptor; + bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_timeline_semaphore; +@@ -145,6 +146,8 @@ struct vkd3d_vulkan_info + + bool rasterization_stream; + bool transform_feedback_queries; ++ bool geometry_shaders; ++ bool tessellation_shaders; + + bool uav_read_without_format; + +@@ -784,8 +787,8 @@ extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; + static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + VkDescriptorType type) + { +- assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); +- assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); ++ VKD3D_ASSERT(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); ++ VKD3D_ASSERT(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); + + return vk_descriptor_set_index_table[type]; + } +@@ -1229,7 +1232,7 @@ enum vkd3d_pipeline_bind_point + /* ID3D12CommandList */ + struct d3d12_command_list + { +- ID3D12GraphicsCommandList5 ID3D12GraphicsCommandList5_iface; ++ ID3D12GraphicsCommandList6 ID3D12GraphicsCommandList6_iface; + unsigned int refcount; + + D3D12_COMMAND_LIST_TYPE type; +@@ -1753,7 +1756,6 @@ static inline void vk_prepend_struct(void *header, void *structure) + { + VkBaseOutStructure *vk_header = header, *vk_structure = structure; + +- assert(!vk_structure->pNext); + vk_structure->pNext = vk_header->pNext; + vk_header->pNext = vk_structure; + } +@@ -1766,7 +1768,7 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) + const void *next; + } *vkd3d_header = header, *vkd3d_structure = structure; + +- assert(!vkd3d_structure->next); ++ VKD3D_ASSERT(!vkd3d_structure->next); + vkd3d_structure->next = vkd3d_header->next; + vkd3d_header->next = vkd3d_structure; + } +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch deleted file mode 100644 index f8153c0f..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-c792114a6a58c7c97abf827d154d7ecd22d.patch +++ /dev/null @@ -1,8054 +0,0 @@ -From 76210e02ea314444cf5b3003913807711d0b206d Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 7 Mar 2024 10:40:41 +1100 -Subject: [PATCH] Updated vkd3d to c792114a6a58c7c97abf827d154d7ecd22d81536. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 6 +- - libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 25 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 616 ++++++++----- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 99 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 536 +++++++++-- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 6 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 118 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 99 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 7 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 858 ++++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 689 ++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 175 ++-- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + - libs/vkd3d/libs/vkd3d-shader/spirv.c | 229 +++-- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 349 +++++-- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 30 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 13 +- - libs/vkd3d/libs/vkd3d/command.c | 141 +-- - libs/vkd3d/libs/vkd3d/device.c | 16 +- - libs/vkd3d/libs/vkd3d/resource.c | 14 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 3 + - 22 files changed, 3073 insertions(+), 958 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index a9d709d10fe..e7b25602ec0 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -52,6 +52,10 @@ - ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ - | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) - -+#define VKD3D_EXPAND(x) x -+#define VKD3D_STRINGIFY(x) #x -+#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) -+ - #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') - #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') - #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') -@@ -233,7 +237,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index f60ef7db769..c2c6ad67804 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -20,6 +20,7 @@ - #define WIDL_C_INLINE_WRAPPERS - #endif - #define COBJMACROS -+ - #define CONST_VTABLE - #include "vkd3d.h" - #include "vkd3d_blob.h" -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 9abc2c4db70..8a3eb5a367a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -254,6 +254,10 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_PHASE ] = "phase", - [VKD3DSIH_PHI ] = "phi", - [VKD3DSIH_POW ] = "pow", -+ [VKD3DSIH_QUAD_READ_ACROSS_D ] = "quad_read_across_d", -+ [VKD3DSIH_QUAD_READ_ACROSS_X ] = "quad_read_across_x", -+ [VKD3DSIH_QUAD_READ_ACROSS_Y ] = "quad_read_across_y", -+ [VKD3DSIH_QUAD_READ_LANE_AT ] = "quad_read_lane_at", - [VKD3DSIH_RCP ] = "rcp", - [VKD3DSIH_REP ] = "rep", - [VKD3DSIH_RESINFO ] = "resinfo", -@@ -1199,7 +1203,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - { - bool untyped = false; - -- switch (compiler->current->handler_idx) -+ switch (compiler->current->opcode) - { - case VKD3DSIH_MOV: - case VKD3DSIH_MOVC: -@@ -1755,7 +1759,7 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_BREAKP: - case VKD3DSIH_CONTINUEP: -@@ -1853,8 +1857,13 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - break; - - case VKD3DSIH_TEX: -- if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) -- vkd3d_string_buffer_printf(buffer, "p"); -+ if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0)) -+ { -+ if (ins->flags & VKD3DSI_TEXLD_PROJECT) -+ vkd3d_string_buffer_printf(buffer, "p"); -+ else if (ins->flags & VKD3DSI_TEXLD_BIAS) -+ vkd3d_string_buffer_printf(buffer, "b"); -+ } - break; - - case VKD3DSIH_WAVE_OP_ADD: -@@ -1937,9 +1946,9 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - if (ins->coissue) - vkd3d_string_buffer_printf(buffer, "+"); - -- shader_print_opcode(compiler, ins->handler_idx); -+ shader_print_opcode(compiler, ins->opcode); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_UAV_TYPED: -@@ -2430,7 +2439,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_ENDIF: -@@ -2459,7 +2468,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - - shader_dump_instruction(&compiler, ins); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_IF: -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index bfd5b52b436..4522d56c5c9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1060,7 +1060,7 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) - - static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) - { -- if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) -+ if ((ins->opcode == VKD3DSIH_BREAKP || ins->opcode == VKD3DSIH_IF) && ins->flags) - { - vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, - "Ignoring unexpected instruction flags %#x.", ins->flags); -@@ -1142,23 +1142,23 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - goto fail; - } - -- if (ins->handler_idx == VKD3DSIH_DCL) -+ if (ins->opcode == VKD3DSIH_DCL) - { - shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); - } -- else if (ins->handler_idx == VKD3DSIH_DEF) -+ else if (ins->opcode == VKD3DSIH_DEF) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_FLOAT); - shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } -- else if (ins->handler_idx == VKD3DSIH_DEFB) -+ else if (ins->opcode == VKD3DSIH_DEFB) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_SCALAR, VKD3D_DATA_UINT); - shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } -- else if (ins->handler_idx == VKD3DSIH_DEFI) -+ else if (ins->opcode == VKD3DSIH_DEFI) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_INT); -@@ -1195,7 +1195,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - return; - - fail: -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - *ptr = sm1->end; - } - -@@ -1326,7 +1326,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - ins = &instructions->elements[instructions->count]; - shader_sm1_read_instruction(&sm1, ins); - -- if (ins->handler_idx == VKD3DSIH_INVALID) -+ if (ins->opcode == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - vsir_program_cleanup(program); -@@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - return ret; - } - --bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -+bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) - { - unsigned int i; - -@@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - bool output; - enum vkd3d_shader_type shader_type; - unsigned int major_version; -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - unsigned int offset; - } - register_table[] = - { -- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, -- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, -- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, -- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, -+ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { -- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -+ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) - && output == register_table[i].output -- && ctx->profile->type == register_table[i].shader_type -- && ctx->profile->major_version == register_table[i].major_version) -+ && version->type == register_table[i].shader_type -+ && version->major == register_table[i].major_version) - { - *type = register_table[i].type; -- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) -+ if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) - *reg = register_table[i].offset; - else -- *reg = semantic->index; -+ *reg = semantic_index; - return true; - } - } -@@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - return false; - } - --bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -+bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -+ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) - { - static const struct - { -@@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { -- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) -+ if (!ascii_strcasecmp(semantic_name, semantics[i].name)) - { - *usage = semantics[i].usage; -- *usage_idx = semantic->index; -+ *usage_idx = semantic_index; - return true; - } - } -@@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU - return false; - } - -+struct d3dbc_compiler -+{ -+ struct vsir_program *program; -+ struct vkd3d_bytecode_buffer buffer; -+ struct vkd3d_shader_message_context *message_context; -+ -+ /* OBJECTIVE: Store all the required information in the other fields so -+ * that this hlsl_ctx is no longer necessary. */ -+ struct hlsl_ctx *ctx; -+}; -+ - static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) - { - if (type == VKD3D_SHADER_TYPE_VERTEX) -@@ -1497,13 +1509,16 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPC_OBJECT; -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - -@@ -1593,13 +1608,16 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPT_VERTEXSHADER; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - -@@ -1677,8 +1695,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) - list_move_tail(&ctx->extern_vars, &sorted); - } - --static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- struct hlsl_ir_function_decl *entry_func) -+void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) - { - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; -@@ -1739,11 +1756,11 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - } - else - { -- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); - put_u32(buffer, var->bind_count[r]); - } - put_u32(buffer, 0); /* type */ -- put_u32(buffer, 0); /* FIXME: default value */ -+ put_u32(buffer, 0); /* default value */ - } - } - -@@ -1767,6 +1784,62 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); -+ -+ if (var->default_values) -+ { -+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int comp_count = hlsl_type_component_count(var->data_type); -+ unsigned int default_value_offset; -+ unsigned int k; -+ -+ default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); -+ set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); -+ -+ for (k = 0; k < comp_count; ++k) -+ { -+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ unsigned int comp_offset; -+ enum hlsl_regset regset; -+ -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ if (regset == HLSL_REGSET_NUMERIC) -+ { -+ union -+ { -+ uint32_t u; -+ float f; -+ } uni; -+ -+ switch (comp_type->e.numeric.type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &var->loc, "Write double default values."); -+ uni.u = 0; -+ break; -+ -+ case HLSL_TYPE_INT: -+ uni.f = var->default_values[k].value.i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ uni.f = var->default_values[k].value.u; -+ break; -+ -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ uni.u = var->default_values[k].value.u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); -+ } -+ } -+ } -+ - ++uniform_count; - } - } -@@ -1778,7 +1851,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); - } - --static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -+static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) - { - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -@@ -1791,7 +1864,7 @@ struct sm1_instruction - - struct sm1_dst_register - { -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; - unsigned int writemask; - uint32_t reg; -@@ -1799,16 +1872,42 @@ struct sm1_instruction - - struct sm1_src_register - { -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; -- } srcs[3]; -+ } srcs[4]; - unsigned int src_count; - - unsigned int has_dst; - }; - -+static bool is_inconsequential_instr(const struct sm1_instruction *instr) -+{ -+ const struct sm1_src_register *src = &instr->srcs[0]; -+ const struct sm1_dst_register *dst = &instr->dst; -+ unsigned int i; -+ -+ if (instr->opcode != D3DSIO_MOV) -+ return false; -+ if (dst->mod != D3DSPDM_NONE) -+ return false; -+ if (src->mod != D3DSPSM_NONE) -+ return false; -+ if (src->type != dst->type) -+ return false; -+ if (src->reg != dst->reg) -+ return false; -+ -+ for (i = 0; i < 4; ++i) -+ { -+ if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) -+ return false; -+ } -+ -+ return true; -+} -+ - static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) - { - assert(reg->writemask); -@@ -1821,15 +1920,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); - } - --static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct sm1_instruction *instr) -+static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - uint32_t token = instr->opcode; - unsigned int i; - -+ if (is_inconsequential_instr(instr)) -+ return; -+ - token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); - -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -1845,54 +1948,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); - } - --static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, -- const struct hlsl_reg *src3) -+static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) - { - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, -- .srcs[2].type = D3DSPR_TEMP, -+ .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, -- const struct hlsl_reg *src2, const struct hlsl_reg *src3) -+static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, -+ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, -- .srcs[2].type = D3DSPR_TEMP, -+ .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, -@@ -1901,26 +2003,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, -@@ -1928,49 +2029,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -+static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src, -+ D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, -@@ -1978,16 +2078,16 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - - /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); -@@ -2004,7 +2104,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_DOUBLE: -@@ -2028,7 +2128,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - break; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_BOOL: -@@ -2057,8 +2157,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - --static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - unsigned int i, x; - - for (i = 0; i < ctx->constant_defs.count; ++i) -@@ -2067,12 +2170,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { -- .type = D3DSPR_CONST, -+ .type = VKD3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = constant_reg->index, - }; - -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2082,32 +2185,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - } - } - --static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_var *var, bool output) -+static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, -+ const struct signature_element *element, bool output) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; - -- if ((!output && !var->last_read) || (output && !var->first_write)) -- return; -- -- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) -+ if (hlsl_sm1_register_from_semantic(version, element->semantic_name, -+ element->semantic_index, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else - { -- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); -+ ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); - assert(ret); -- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; -- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; -+ reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ reg.reg = element->register_index; - } - - token = D3DSIO_DCL; -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2116,39 +2219,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - -- reg.writemask = (1 << var->data_type->dimx) - 1; -+ reg.writemask = element->mask; - write_sm1_dst_register(buffer, ®); - } - --static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) - { -+ struct vsir_program *program = d3dbc->program; -+ const struct vkd3d_shader_version *version; - bool write_in = false, write_out = false; -- struct hlsl_ir_var *var; - -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) -+ version = &program->shader_version; -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) - write_in = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) - write_in = write_out = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) - write_in = true; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ if (write_in) -+ { -+ for (unsigned int i = 0; i < program->input_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); -+ } -+ -+ if (write_out) - { -- if (write_in && var->is_input_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, false); -- if (write_out && var->is_output_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, true); -+ for (unsigned int i = 0; i < program->output_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); - } - } - --static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, - unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, res_type = 0; - - token = D3DSIO_DCL; -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2175,20 +2286,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu - token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; - put_u32(buffer, token); - -- reg.type = D3DSPR_SAMPLER; -+ reg.type = VKD3DSPR_COMBINED_SAMPLER; - reg.writemask = VKD3DSP_WRITEMASK_ALL; - reg.reg = reg_id; - - write_sm1_dst_register(buffer, ®); - } - --static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - enum hlsl_sampler_dim sampler_dim; - unsigned int i, count, reg_id; - struct hlsl_ir_var *var; - -- if (ctx->profile->major_version < 2) -+ if (version->major < 2) - return; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -@@ -2210,27 +2323,26 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - continue; - } - -- reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; -- write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); -+ reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; -+ d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); - } - } - } - } - --static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_CONST, -+ .srcs[0].type = VKD3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, -@@ -2239,10 +2351,10 @@ static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - assert(instr->reg.allocated); - assert(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) - { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -@@ -2255,28 +2367,30 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); -- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); -+ d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); - } - } - --static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - - assert(instr->reg.allocated); - - if (expr->op == HLSL_OP1_REINTERPRET) - { -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - return; - } - - if (expr->op == HLSL_OP1_CAST) - { -- write_sm1_cast(ctx, buffer, instr); -+ d3dbc_write_cast(d3dbc, instr); - return; - } - -@@ -2290,70 +2404,70 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - switch (expr->op) - { - case HLSL_OP1_ABS: -- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSX: -- write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSY: -- write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_LOG2: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); - break; - - case HLSL_OP1_NEG: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); - break; - - case HLSL_OP2_ADD: -- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: -@@ -2362,27 +2476,27 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - break; - - case HLSL_OP2_LOGIC_AND: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_LOGIC_OR: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_SLT: -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) - hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); -- write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP3_CMP: -- if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -+ if (version->type == VKD3D_SHADER_TYPE_VERTEX) - hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); -- write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - case HLSL_OP3_DP2ADD: -- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - default: -@@ -2391,10 +2505,9 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - --static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block); -+static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); - --static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_if *iff = hlsl_ir_if(instr); - const struct hlsl_ir_node *condition; -@@ -2408,33 +2521,33 @@ static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf - .opcode = D3DSIO_IFC, - .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[0].reg = condition->reg.id, - .srcs[0].mod = 0, - -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[1].reg = condition->reg.id, - .srcs[1].mod = D3DSPSM_NEG, - - .src_count = 2, - }; -- write_sm1_instruction(ctx, buffer, &sm1_ifc); -- write_sm1_block(ctx, buffer, &iff->then_block); -+ d3dbc_write_instruction(d3dbc, &sm1_ifc); -+ d3dbc_write_block(d3dbc, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; -- write_sm1_instruction(ctx, buffer, &sm1_else); -- write_sm1_block(ctx, buffer, &iff->else_block); -+ d3dbc_write_instruction(d3dbc, &sm1_else); -+ d3dbc_write_block(d3dbc, &iff->else_block); - } - - sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; -- write_sm1_instruction(ctx, buffer, &sm1_endif); -+ d3dbc_write_instruction(d3dbc, &sm1_endif); - } - --static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); - -@@ -2448,35 +2561,36 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - { - .opcode = D3DSIO_TEXKILL, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg->id, - .dst.writemask = reg->writemask, - .has_dst = 1, - }; - -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - break; - } - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - } - } - --static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, -@@ -2487,15 +2601,15 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - if (load->src.var->is_uniform) - { - assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_CONST; -+ sm1_instr.srcs[0].type = VKD3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { -- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, -- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) -+ if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, -+ load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { - assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_INPUT; -+ sm1_instr.srcs[0].type = VKD3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else -@@ -2503,32 +2617,34 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - struct hlsl_ir_node *coords = load->coords.node; -+ struct hlsl_ir_node *ddx = load->ddx.node; -+ struct hlsl_ir_node *ddy = load->ddy.node; - unsigned int sampler_offset, reg_id; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - struct sm1_instruction sm1_instr; - - sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); -- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; -+ reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; - - sm1_instr = (struct sm1_instruction) - { -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = coords->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), - -- .srcs[1].type = D3DSPR_SAMPLER, -+ .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, - .srcs[1].reg = reg_id, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), - -@@ -2546,6 +2662,25 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; - break; - -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ sm1_instr.opcode = D3DSIO_TEX; -+ sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ sm1_instr.opcode = D3DSIO_TEXLDD; -+ -+ sm1_instr.srcs[2].type = VKD3DSPR_TEMP; -+ sm1_instr.srcs[2].reg = ddx->reg.id; -+ sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); -+ -+ sm1_instr.srcs[3].type = VKD3DSPR_TEMP; -+ sm1_instr.srcs[3].reg = ddy->reg.id; -+ sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); -+ -+ sm1_instr.src_count += 2; -+ break; -+ - default: - hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); - return; -@@ -2553,25 +2688,26 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - - assert(instr->reg.allocated); - -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - const struct hlsl_ir_store *store = hlsl_ir_store(instr); -- const struct hlsl_ir_node *rhs = store->rhs.node; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); -+ const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, -@@ -2585,16 +2721,16 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - - if (store->lhs.var->is_output_semantic) - { -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) - { -- sm1_instr.dst.type = D3DSPR_TEMP; -+ sm1_instr.dst.type = VKD3DSPR_TEMP; - sm1_instr.dst.reg = 0; - } -- else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, -- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) -+ else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, -+ store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { - assert(reg.allocated); -- sm1_instr.dst.type = D3DSPR_OUTPUT; -+ sm1_instr.dst.type = VKD3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else -@@ -2604,11 +2740,10 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - assert(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; -@@ -2616,12 +2751,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), -@@ -2631,12 +2766,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - assert(instr->reg.allocated); - assert(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block) -+static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) - { -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -@@ -2656,38 +2791,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: -- write_sm1_constant(ctx, buffer, instr); -+ d3dbc_write_constant(d3dbc, instr); - break; - - case HLSL_IR_EXPR: -- write_sm1_expr(ctx, buffer, instr); -+ d3dbc_write_expr(d3dbc, instr); - break; - - case HLSL_IR_IF: - if (hlsl_version_ge(ctx, 2, 1)) -- write_sm1_if(ctx, buffer, instr); -+ d3dbc_write_if(d3dbc, instr); - else - hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); - break; - - case HLSL_IR_JUMP: -- write_sm1_jump(ctx, buffer, instr); -+ d3dbc_write_jump(d3dbc, instr); - break; - - case HLSL_IR_LOAD: -- write_sm1_load(ctx, buffer, instr); -+ d3dbc_write_load(d3dbc, instr); - break; - - case HLSL_IR_RESOURCE_LOAD: -- write_sm1_resource_load(ctx, buffer, instr); -+ d3dbc_write_resource_load(d3dbc, instr); - break; - - case HLSL_IR_STORE: -- write_sm1_store(ctx, buffer, instr); -+ d3dbc_write_store(d3dbc, instr); - break; - - case HLSL_IR_SWIZZLE: -- write_sm1_swizzle(ctx, buffer, instr); -+ d3dbc_write_swizzle(d3dbc, instr); - break; - - default: -@@ -2696,32 +2831,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - } - } - --int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -+/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving -+ * data from the other parameters instead, so it can be removed as an argument -+ * and be declared in vkd3d_shader_private.h and used without relying on HLSL -+ * IR structs. */ -+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -- struct vkd3d_bytecode_buffer buffer = {0}; -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ struct d3dbc_compiler d3dbc = {0}; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; -+ -+ d3dbc.ctx = ctx; -+ d3dbc.program = program; -+ d3dbc.message_context = message_context; - -- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); -+ put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - -- write_sm1_uniforms(ctx, &buffer, entry_func); -+ bytecode_put_bytes(buffer, ctab->code, ctab->size); - -- write_sm1_constant_defs(ctx, &buffer); -- write_sm1_semantic_dcls(ctx, &buffer); -- write_sm1_sampler_dcls(ctx, &buffer); -- write_sm1_block(ctx, &buffer, &entry_func->body); -+ d3dbc_write_constant_defs(&d3dbc); -+ d3dbc_write_semantic_dcls(&d3dbc); -+ d3dbc_write_sampler_dcls(&d3dbc); -+ d3dbc_write_block(&d3dbc, &entry_func->body); - -- put_u32(&buffer, D3DSIO_END); -+ put_u32(buffer, D3DSIO_END); - -- if (buffer.status) -- ctx->result = buffer.status; -+ if (buffer->status) -+ ctx->result = buffer->status; - - if (!ctx->result) - { -- out->code = buffer.data; -- out->size = buffer.size; -+ out->code = buffer->data; -+ out->size = buffer->size; - } - else - { -- vkd3d_free(buffer.data); -+ vkd3d_free(buffer->data); - } - return ctx->result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 73a8d8687c5..2176debc7d2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -458,6 +458,8 @@ enum dx_intrinsic_opcode - DX_WAVE_ACTIVE_OP = 119, - DX_WAVE_ACTIVE_BIT = 120, - DX_WAVE_PREFIX_OP = 121, -+ DX_QUAD_READ_LANE_AT = 122, -+ DX_QUAD_OP = 123, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, - DX_WAVE_ALL_BIT_COUNT = 135, -@@ -576,6 +578,13 @@ enum dxil_wave_op_kind - WAVE_OP_MAX = 3, - }; - -+enum dxil_quad_op_kind -+{ -+ QUAD_READ_ACROSS_X = 0, -+ QUAD_READ_ACROSS_Y = 1, -+ QUAD_READ_ACROSS_D = 2, -+}; -+ - struct sm6_pointer_info - { - const struct sm6_type *type; -@@ -3755,21 +3764,21 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - for (i = 0; i < sm6->p.program->instructions.count; ++i) - { - ins = &sm6->p.program->instructions.elements[i]; -- if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) -+ if (ins->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) - { - ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( - (uintptr_t)ins->declaration.indexable_temp.initialiser, sm6); - } -- else if (ins->handler_idx == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) -+ else if (ins->opcode == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) - { - ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); - } -- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) -+ else if (ins->opcode == VKD3DSIH_DCL_TGSM_RAW) - { - ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); - ins->flags = 0; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) -+ else if (ins->opcode == VKD3DSIH_DCL_TGSM_STRUCTURED) - { - ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); - ins->flags = 0; -@@ -4402,7 +4411,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record - code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, -@@ -4619,6 +4628,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co - return VKD3DSIH_IMAX; - case DX_IMIN: - return VKD3DSIH_IMIN; -+ case DX_QUAD_READ_LANE_AT: -+ return VKD3DSIH_QUAD_READ_LANE_AT; - case DX_UMAX: - return VKD3DSIH_UMAX; - case DX_UMIN: -@@ -4962,7 +4973,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - reg->non_uniform = !!sm6_value_get_constant_uint(operands[3]); - - /* NOP is used to flag no instruction emitted. */ -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -@@ -5370,6 +5381,47 @@ static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intr - sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); - } - -+static enum vkd3d_shader_opcode dx_map_quad_op(enum dxil_quad_op_kind op) -+{ -+ switch (op) -+ { -+ case QUAD_READ_ACROSS_X: -+ return VKD3DSIH_QUAD_READ_ACROSS_X; -+ case QUAD_READ_ACROSS_Y: -+ return VKD3DSIH_QUAD_READ_ACROSS_Y; -+ case QUAD_READ_ACROSS_D: -+ return VKD3DSIH_QUAD_READ_ACROSS_D; -+ default: -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum vkd3d_shader_opcode opcode; -+ enum dxil_quad_op_kind quad_op; -+ -+ quad_op = sm6_value_get_constant_uint(operands[1]); -+ if ((opcode = dx_map_quad_op(quad_op)) == VKD3DSIH_INVALID) -+ { -+ FIXME("Unhandled quad op kind %u.\n", quad_op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, -+ "Quad op kind %u is unhandled.", quad_op); -+ return; -+ } -+ -+ vsir_instruction_init(ins, &sm6->p.location, opcode); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -6229,6 +6281,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, - [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, - [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, -+ [DX_QUAD_OP ] = {"n", "Rc", sm6_parser_emit_dx_quad_op}, -+ [DX_QUAD_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, - [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, - [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, - [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -6381,7 +6435,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade - { - const struct sm6_type *type; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - - if (!dst->type) - return; -@@ -6628,7 +6682,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor - { - *dst = *value; - dst->type = type; -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - return; - } - -@@ -6739,7 +6793,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - * do not otherwise occur, so deleting these avoids the need for backend support. */ - if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) - { -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - *dst = *a; - return; - } -@@ -7039,7 +7093,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record - reg->idx_count = 2; - dst->structure_stride = src->structure_stride; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7189,7 +7243,7 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record - incoming[j].block = sm6_function_get_block(function, record->operands[i + 1], sm6); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - - qsort(incoming, phi->incoming_count, sizeof(*incoming), phi_incoming_compare); - -@@ -7224,7 +7278,7 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record - - code_block->terminator.type = TERMINATOR_RET; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7384,7 +7438,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec - terminator->cases[i / 2u].value = sm6_value_get_constant_uint64(src); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7843,7 +7897,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - } - - ins = &code_block->instructions[code_block->instruction_count]; -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - - dst = sm6_parser_get_current_value(sm6); - fwd_type = dst->type; -@@ -7922,7 +7976,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - - if (sm6->p.failed) - return VKD3D_ERROR; -- assert(ins->handler_idx != VKD3DSIH_INVALID); - - if (record->attachment) - metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); -@@ -7933,9 +7986,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; - } - if (code_block) -- code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; -- else -- assert(ins->handler_idx == VKD3DSIH_NOP); -+ code_block->instruction_count += ins->opcode != VKD3DSIH_NOP; - - if (dst->type && fwd_type && dst->type != fwd_type) - { -@@ -8735,7 +8786,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - - if (!m) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; - ins->declaration.raw_resource.resource.reg.write_mask = 0; - return &ins->declaration.raw_resource.resource; - } -@@ -8760,7 +8811,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - "A typed resource has no data type."); - } - -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; - for (i = 0; i < VKD3D_VEC4_SIZE; ++i) - ins->declaration.semantic.resource_data_type[i] = resource_values.data_type; - ins->declaration.semantic.resource_type = resource_type; -@@ -8770,14 +8821,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - } - else if (kind == RESOURCE_KIND_RAWBUFFER) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; - ins->declaration.raw_resource.resource.reg.write_mask = 0; - - return &ins->declaration.raw_resource.resource; - } - else if (kind == RESOURCE_KIND_STRUCTUREDBUFFER) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; - ins->declaration.structured_resource.byte_stride = resource_values.byte_stride; - ins->declaration.structured_resource.resource.reg.write_mask = 0; - -@@ -8858,7 +8909,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, - d->kind = kind; - d->reg_type = VKD3DSPR_RESOURCE; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; -- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL) -+ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - - init_resource_declaration(resource, VKD3DSPR_RESOURCE, d->reg_data_type, d->id, &d->range); -@@ -8932,7 +8983,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, - d->kind = values[0]; - d->reg_type = VKD3DSPR_UAV; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; -- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL_UAV_TYPED) -+ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - - init_resource_declaration(resource, VKD3DSPR_UAV, d->reg_data_type, d->id, &d->range); -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 57b4ac24212..bd2ad1290cd 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -56,6 +56,70 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) - vkd3d_free(string_entry); - } - -+struct state_block_function_info -+{ -+ const char *name; -+ unsigned int min_args, max_args; -+}; -+ -+static const struct state_block_function_info *get_state_block_function_info(const char *name) -+{ -+ static const struct state_block_function_info valid_functions[] = -+ { -+ {"SetBlendState", 3, 3}, -+ {"SetDepthStencilState", 2, 2}, -+ {"SetRasterizerState", 1, 1}, -+ {"SetVertexShader", 1, 1}, -+ {"SetDomainShader", 1, 1}, -+ {"SetHullShader", 1, 1}, -+ {"SetGeometryShader", 1, 1}, -+ {"SetPixelShader", 1, 1}, -+ {"SetComputeShader", 1, 1}, -+ {"OMSetRenderTargets", 2, 9}, -+ }; -+ -+ for (unsigned int i = 0; i < ARRAY_SIZE(valid_functions); ++i) -+ { -+ if (!strcmp(name, valid_functions[i].name)) -+ return &valid_functions[i]; -+ } -+ return NULL; -+} -+ -+bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, -+ const struct vkd3d_shader_location *loc) -+{ -+ if (entry->is_function_call) -+ { -+ const struct state_block_function_info *info = get_state_block_function_info(entry->name); -+ -+ if (!info) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid state block function '%s'.", entry->name); -+ return false; -+ } -+ if (entry->args_count < info->min_args || entry->args_count > info->max_args) -+ { -+ if (info->min_args == info->max_args) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid argument count for state block function '%s' (expected %u).", -+ entry->name, info->min_args); -+ } -+ else -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid argument count for state block function '%s' (expected from %u to %u).", -+ entry->name, info->min_args, info->max_args); -+ } -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - struct fx_write_context; - - struct fx_write_context_ops -@@ -63,6 +127,7 @@ struct fx_write_context_ops - uint32_t (*write_string)(const char *string, struct fx_write_context *fx); - void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); - void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); -+ void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); - bool are_child_effects_supported; - }; - -@@ -94,6 +159,8 @@ struct fx_write_context - uint32_t texture_count; - uint32_t uav_count; - uint32_t sampler_state_count; -+ uint32_t depth_stencil_state_count; -+ uint32_t rasterizer_state_count; - int status; - - bool child_effect; -@@ -128,8 +195,41 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - fx->ops->write_pass(var, fx); - } - -+static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) -+{ -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_var *v; -+ uint32_t count = 0; -+ -+ if (!scope) -+ return 0; -+ -+ LIST_FOR_EACH_ENTRY(v, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (!v->default_values) -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Annotation variable is missing default value."); -+ -+ fx->ops->write_annotation(v, fx); -+ ++count; -+ } -+ -+ return count; -+} -+ -+static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count_offset, count; -+ -+ count_offset = put_u32(buffer, 0); -+ count = write_annotations(scope, fx); -+ set_u32(buffer, count_offset, count); -+} -+ - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); - static const char * get_fx_4_type_name(const struct hlsl_type *type); -+static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); - - static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -@@ -279,9 +379,9 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); - put_u32(buffer, 0); /* Assignment count. */ -- put_u32(buffer, 0); /* Annotation count. */ - -- /* TODO: annotations */ -+ write_fx_4_annotations(var->annotations, fx); -+ - /* TODO: assignments */ - } - -@@ -402,6 +502,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - return uav_type_names[type->sampler_dim]; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ return "DepthStencilState"; -+ - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - return "DepthStencilView"; - -@@ -421,10 +524,20 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -+ struct field_offsets -+ { -+ uint32_t name; -+ uint32_t semantic; -+ uint32_t offset; -+ uint32_t type; -+ }; -+ uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -- uint32_t name_offset, offset, size, stride, numeric_desc; -+ struct field_offsets *field_offsets = NULL; -+ struct hlsl_ctx *ctx = fx->ctx; - uint32_t elements_count = 0; - const char *name; -+ size_t i; - - /* Resolve arrays to element type and number of elements. */ - if (type->class == HLSL_CLASS_ARRAY) -@@ -436,6 +549,22 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - name = get_fx_4_type_name(type); - - name_offset = write_string(name, fx); -+ if (type->class == HLSL_CLASS_STRUCT) -+ { -+ if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) -+ return 0; -+ -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ -+ field_offsets[i].name = write_string(field->name, fx); -+ field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); -+ field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; -+ field_offsets[i].type = write_type(field->type, fx); -+ } -+ } -+ - offset = put_u32_unaligned(buffer, name_offset); - - switch (type->class) -@@ -446,8 +575,10 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, 1); - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: -@@ -464,6 +595,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_CONSTANT_BUFFER: - vkd3d_unreachable(); - - case HLSL_CLASS_STRING: -@@ -473,34 +605,40 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - return 0; - } - -- size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ /* Structures can only contain numeric fields, this is validated during variable declaration. */ -+ total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ packed_size = 0; -+ if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) -+ packed_size = hlsl_type_component_count(type) * sizeof(float); - if (elements_count) -- size *= elements_count; -+ { -+ total_size *= elements_count; -+ packed_size *= elements_count; -+ } - stride = align(stride, 4 * sizeof(float)); - - put_u32_unaligned(buffer, elements_count); -- put_u32_unaligned(buffer, size); /* Total size. */ -- put_u32_unaligned(buffer, stride); /* Stride. */ -- put_u32_unaligned(buffer, size); -+ put_u32_unaligned(buffer, total_size); -+ put_u32_unaligned(buffer, stride); -+ put_u32_unaligned(buffer, packed_size); - - if (type->class == HLSL_CLASS_STRUCT) - { -- size_t i; -- - put_u32_unaligned(buffer, type->e.record.field_count); - for (i = 0; i < type->e.record.field_count; ++i) - { -- const struct hlsl_struct_field *field = &type->e.record.fields[i]; -- uint32_t semantic_offset, field_type_offset; -+ const struct field_offsets *field = &field_offsets[i]; - -- name_offset = write_string(field->name, fx); -- semantic_offset = write_string(field->semantic.name, fx); -- field_type_offset = write_type(field->type, fx); -+ put_u32_unaligned(buffer, field->name); -+ put_u32_unaligned(buffer, field->semantic); -+ put_u32_unaligned(buffer, field->offset); -+ put_u32_unaligned(buffer, field->type); -+ } - -- put_u32_unaligned(buffer, name_offset); -- put_u32_unaligned(buffer, semantic_offset); -- put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); -- put_u32_unaligned(buffer, field_type_offset); -+ if (ctx->profile->major_version == 5) -+ { -+ put_u32_unaligned(buffer, 0); /* Base class type */ -+ put_u32_unaligned(buffer, 0); /* Interface count */ - } - } - else if (type->class == HLSL_CLASS_TEXTURE) -@@ -556,6 +694,14 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - put_u32_unaligned(buffer, 6); - } -+ else if (type->class == HLSL_CLASS_RASTERIZER_STATE) -+ { -+ put_u32_unaligned(buffer, 4); -+ } -+ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) -+ { -+ put_u32_unaligned(buffer, 3); -+ } - else if (hlsl_is_numeric_type(type)) - { - numeric_desc = get_fx_4_numeric_type_description(type, fx); -@@ -565,9 +711,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - FIXME("Type %u is not supported.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -- return 0; - } - -+ vkd3d_free(field_offsets); - return offset; - } - -@@ -581,8 +727,9 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); - count_offset = put_u32(buffer, 0); -- put_u32(buffer, 0); /* Annotation count. */ -+ write_fx_4_annotations(var->annotations, fx); - -+ count = 0; - LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) - { - write_pass(pass, fx); -@@ -617,7 +764,7 @@ static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) - - put_u32(buffer, name_offset); - count_offset = put_u32(buffer, 0); /* Technique count */ -- put_u32(buffer, 0); /* Annotation count */ -+ write_fx_4_annotations(var ? var->annotations : NULL, fx); - - count = fx->technique_count; - write_techniques(var ? var->scope : fx->ctx->globals, fx); -@@ -683,7 +830,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - } - - name_offset = write_string(name, fx); -- semantic_offset = write_string(semantic->name, fx); -+ semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; - - offset = put_u32(buffer, hlsl_sm1_base_type(type)); - put_u32(buffer, hlsl_sm1_class(type)); -@@ -794,6 +941,9 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_STRUCT: - /* FIXME: write actual initial value */ -+ if (var->default_values) -+ hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); -+ - offset = put_u32(buffer, 0); - - for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) -@@ -850,8 +1000,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); - return false; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_VOID: - return false; -@@ -859,6 +1011,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_CONSTANT_BUFFER: - /* This cannot appear as an extern variable. */ - break; - } -@@ -972,9 +1125,72 @@ static const struct fx_write_context_ops fx_4_ops = - .write_string = write_fx_4_string, - .write_technique = write_fx_4_technique, - .write_pass = write_fx_4_pass, -+ .write_annotation = write_fx_4_annotation, - .are_child_effects_supported = true, - }; - -+static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, -+ struct fx_write_context *fx) -+{ -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); -+ uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t offset = buffer->size; -+ unsigned int comp_count; -+ -+ if (!value) -+ return 0; -+ -+ comp_count = hlsl_type_component_count(type); -+ -+ for (i = 0; i < elements_count; ++i) -+ { -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ { -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ -+ for (j = 0; j < comp_count; ++j) -+ { -+ put_u32_unaligned(buffer, value->value.u); -+ value++; -+ } -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", -+ type->e.numeric.type); -+ } -+ -+ break; -+ } -+ case HLSL_CLASS_STRUCT: -+ { -+ struct hlsl_struct_field *fields = type->e.record.fields; -+ -+ for (j = 0; j < type->e.record.field_count; ++j) -+ { -+ write_fx_4_default_value(fields[i].type, value, fx); -+ value += hlsl_type_component_count(fields[i].type); -+ } -+ break; -+ } -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); -+ } -+ } -+ -+ return offset; -+} -+ - static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -@@ -984,22 +1200,20 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - { - HAS_EXPLICIT_BIND_POINT = 0x4, - }; -- struct hlsl_ctx *ctx = fx->ctx; - -- /* Explicit bind point. */ -- if (var->reg_reservation.reg_type) -+ if (var->has_explicit_bind_point) - flags |= HAS_EXPLICIT_BIND_POINT; - - type_offset = write_type(var->data_type, fx); - name_offset = write_string(var->name, fx); -- semantic_offset = write_string(var->semantic.name, fx); -+ semantic_offset = write_string(var->semantic.raw_name, fx); - - put_u32(buffer, name_offset); - put_u32(buffer, type_offset); - - semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ -- put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ -- value_offset = put_u32(buffer, 0); /* Default value offset */ -+ put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ -+ value_offset = put_u32(buffer, 0); - put_u32(buffer, flags); /* Flags */ - - if (shared) -@@ -1008,17 +1222,39 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - } - else - { -- /* FIXME: write default value */ -- set_u32(buffer, value_offset, 0); -+ uint32_t offset = write_fx_4_default_value(var->data_type, var->default_values, fx); -+ set_u32(buffer, value_offset, offset); - -- put_u32(buffer, 0); /* Annotations count */ -- if (has_annotations(var)) -- hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); -+ write_fx_4_annotations(var->annotations, fx); - - fx->numeric_variable_count++; - } - } - -+static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t name_offset, type_offset, offset; -+ struct hlsl_ctx *ctx = fx->ctx; -+ -+ name_offset = write_string(var->name, fx); -+ type_offset = write_type(var->data_type, fx); -+ -+ put_u32(buffer, name_offset); -+ put_u32(buffer, type_offset); -+ -+ if (hlsl_is_numeric_type(type)) -+ { -+ offset = write_fx_4_default_value(var->data_type, var->default_values, fx); -+ put_u32(buffer, offset); -+ } -+ else -+ { -+ hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); -+ } -+} -+ - struct rhs_named_value - { - const char *name; -@@ -1118,6 +1354,9 @@ static bool state_block_contains_state(const char *name, unsigned int start, str - - for (i = start; i < block->count; ++i) - { -+ if (block->entries[i]->is_function_call) -+ continue; -+ - if (!ascii_strcasecmp(block->entries[i]->name, name)) - return true; - } -@@ -1160,6 +1399,41 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no - return true; - } - -+static void fold_state_value(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry) -+{ -+ bool progress; -+ -+ do -+ { -+ progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); -+ progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); -+ } while (progress); -+} -+ -+enum state_property_component_type -+{ -+ FX_BOOL, -+ FX_FLOAT, -+ FX_UINT, -+ FX_UINT8, -+}; -+ -+static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) -+{ -+ switch (type) -+ { -+ case FX_BOOL: -+ return HLSL_TYPE_BOOL; -+ case FX_FLOAT: -+ return HLSL_TYPE_FLOAT; -+ case FX_UINT: -+ case FX_UINT8: -+ return HLSL_TYPE_UINT; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ - static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, - struct fx_write_context *fx) - { -@@ -1209,37 +1483,112 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - { NULL } - }; - -+ static const struct rhs_named_value depth_write_mask_values[] = -+ { -+ { "ZERO", 0 }, -+ { "ALL", 1 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value comparison_values[] = -+ { -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value stencil_op_values[] = -+ { -+ { "KEEP", 1 }, -+ { "ZERO", 2 }, -+ { "REPLACE", 3 }, -+ { "INCR_SAT", 4 }, -+ { "DECR_SAT", 5 }, -+ { "INVERT", 6 }, -+ { "INCR", 7 }, -+ { "DECR", 8 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value fill_values[] = -+ { -+ { "WIREFRAME", 2 }, -+ { "SOLID", 3 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value cull_values[] = -+ { -+ { "NONE", 1 }, -+ { "FRONT", 2 }, -+ { "BACK", 3 }, -+ { NULL } -+ }; -+ - static const struct state - { - const char *name; - enum hlsl_type_class container; -- enum hlsl_base_type type; -+ enum hlsl_type_class class; -+ enum state_property_component_type type; - unsigned int dimx; - uint32_t id; - const struct rhs_named_value *values; - } - states[] = - { -- { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, -- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, -- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, -- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, -- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, -- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, -- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, -- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, -- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, -- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, -+ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 12, fill_values }, -+ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 13, cull_values }, -+ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 14 }, -+ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 15 }, -+ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 16 }, -+ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 17 }, -+ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 18 }, -+ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 19 }, -+ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 20 }, -+ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 21 }, -+ -+ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 22 }, -+ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 23, depth_write_mask_values }, -+ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 24, comparison_values }, -+ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 25 }, -+ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 26 }, -+ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 27 }, -+ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 28, stencil_op_values }, -+ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 29, stencil_op_values }, -+ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 30, stencil_op_values }, -+ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 31, comparison_values }, -+ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 32, stencil_op_values }, -+ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 33, stencil_op_values }, -+ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 34, stencil_op_values }, -+ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 35, comparison_values }, -+ -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 54 }, - /* TODO: "Texture" field */ - }; - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - struct replace_state_context replace_context; -+ struct hlsl_type *state_type = NULL; - struct hlsl_ir_node *node, *cast; - const struct state *state = NULL; - struct hlsl_ctx *ctx = fx->ctx; -- struct hlsl_type *state_type; -+ enum hlsl_base_type base_type; - unsigned int i; -- bool progress; - - for (i = 0; i < ARRAY_SIZE(states); ++i) - { -@@ -1269,28 +1618,54 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - replace_context.values = state->values; - replace_context.var = var; - -- /* Turned named constants to actual constants. */ -+ /* Turn named constants to actual constants. */ - hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); -+ fold_state_value(ctx, entry); - -- if (state->dimx) -- state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); -- else -- state_type = hlsl_get_scalar_type(ctx, state->type); -- -- /* Cast to expected property type. */ -- node = entry->args->node; -- if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) -- return; -- list_add_after(&node->entry, &cast->entry); -+ /* Now cast and run folding again. */ - -- hlsl_src_remove(entry->args); -- hlsl_src_from_node(entry->args, cast); -+ base_type = hlsl_type_from_fx_type(state->type); -+ switch (state->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ state_type = hlsl_get_vector_type(ctx, base_type, state->dimx); -+ break; -+ case HLSL_CLASS_SCALAR: -+ state_type = hlsl_get_scalar_type(ctx, base_type); -+ break; -+ case HLSL_CLASS_TEXTURE: -+ hlsl_fixme(ctx, &ctx->location, "Object type fields are not supported."); -+ break; -+ default: -+ ; -+ } - -- do -+ if (state_type) - { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); -- } while (progress); -+ node = entry->args->node; -+ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) -+ return; -+ list_add_after(&node->entry, &cast->entry); -+ -+ /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ -+ if (state->type == FX_UINT8) -+ { -+ struct hlsl_ir_node *mask; -+ -+ if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) -+ return; -+ list_add_after(&cast->entry, &mask->entry); -+ -+ if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) -+ return; -+ list_add_after(&mask->entry, &cast->entry); -+ } -+ -+ hlsl_src_remove(entry->args); -+ hlsl_src_from_node(entry->args, cast); -+ -+ fold_state_value(ctx, entry); -+ } - } - - static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -@@ -1344,7 +1719,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - - type_offset = write_type(var->data_type, fx); - name_offset = write_string(var->name, fx); -- semantic_offset = write_string(var->semantic.name, fx); -+ semantic_offset = write_string(var->semantic.raw_name, fx); - - put_u32(buffer, name_offset); - put_u32(buffer, type_offset); -@@ -1383,19 +1758,27 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - fx->dsv_count += elements_count; - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->depth_stencil_state_count += elements_count; -+ break; -+ - case HLSL_CLASS_SAMPLER: - write_fx_4_state_object_initializer(var, fx); - fx->sampler_state_count += elements_count; - break; - -+ case HLSL_CLASS_RASTERIZER_STATE: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->rasterizer_state_count += elements_count; -+ break; -+ - default: - hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", - type->e.numeric.type); - } - -- put_u32(buffer, 0); /* Annotations count */ -- if (has_annotations(var)) -- hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); -+ write_fx_4_annotations(var->annotations, fx); - - ++fx->object_variable_count; - } -@@ -1438,9 +1821,7 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - } - else - { -- put_u32(buffer, 0); /* Annotations count */ -- if (b->annotations) -- hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); -+ write_fx_4_annotations(b->annotations, fx); - ++fx->buffer_count; - } - -@@ -1464,6 +1845,9 @@ static void write_buffers(struct fx_write_context *fx, bool shared) - { - struct hlsl_buffer *buffer; - -+ if (shared && !fx->child_effect) -+ return; -+ - LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) - { - if (!buffer->size && !fx->include_empty_buffers) -@@ -1483,8 +1867,10 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc - - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: -@@ -1551,9 +1937,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ - put_u32(&buffer, fx.texture_count); -- put_u32(&buffer, 0); /* Depth stencil state count. */ -+ put_u32(&buffer, fx.depth_stencil_state_count); - put_u32(&buffer, 0); /* Blend state count. */ -- put_u32(&buffer, 0); /* Rasterizer state count. */ -+ put_u32(&buffer, fx.rasterizer_state_count); - put_u32(&buffer, fx.sampler_state_count); - put_u32(&buffer, fx.rtv_count); - put_u32(&buffer, fx.dsv_count); -@@ -1609,9 +1995,9 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ - put_u32(&buffer, fx.texture_count); -- put_u32(&buffer, 0); /* Depth stencil state count. */ -+ put_u32(&buffer, fx.depth_stencil_state_count); - put_u32(&buffer, 0); /* Blend state count. */ -- put_u32(&buffer, 0); /* Rasterizer state count. */ -+ put_u32(&buffer, fx.rasterizer_state_count); - put_u32(&buffer, fx.sampler_state_count); - put_u32(&buffer, fx.rtv_count); - put_u32(&buffer, fx.dsv_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 3e482a5fc70..8725724a239 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -48,9 +48,9 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne - static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { - shader_glsl_print_indent(&gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); -+ vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->opcode); - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); -+ "Internal compiler error: Unhandled instruction %#x.", ins->opcode); - } - - static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, -@@ -74,7 +74,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator - { - generator->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 99214fba6de..acf50869a40 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -167,6 +167,8 @@ void hlsl_free_var(struct hlsl_ir_var *decl) - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - vkd3d_free((void *)decl->objects_usage[k]); - -+ vkd3d_free(decl->default_values); -+ - for (i = 0; i < decl->state_block_count; ++i) - hlsl_free_state_block(decl->state_blocks[i]); - vkd3d_free(decl->state_blocks); -@@ -367,15 +369,18 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - type->reg_size[HLSL_REGSET_UAVS] = 1; - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - } -@@ -435,11 +440,13 @@ static bool type_is_single_component(const struct hlsl_type *type) - { - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -@@ -450,6 +457,7 @@ static bool type_is_single_component(const struct hlsl_type *type) - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_CONSTANT_BUFFER: - return false; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -528,6 +536,12 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - } - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ { -+ *type_ptr = type->e.resource.format; -+ return traverse_path_from_component_index(ctx, type_ptr, index_ptr); -+ } -+ - default: - vkd3d_unreachable(); - } -@@ -556,12 +570,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - - switch (type->class) - { -- case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -- case HLSL_CLASS_MATRIX: - offset[HLSL_REGSET_NUMERIC] += idx; - break; - -+ case HLSL_CLASS_MATRIX: -+ offset[HLSL_REGSET_NUMERIC] += 4 * idx; -+ break; -+ - case HLSL_CLASS_STRUCT: - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - offset[r] += type->e.record.fields[idx].reg_offset[r]; -@@ -577,8 +593,10 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - } - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -592,6 +610,8 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_CONSTANT_BUFFER: - vkd3d_unreachable(); - } - type = next_type; -@@ -865,6 +885,20 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim - return type; - } - -+struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) -+ return NULL; -+ type->class = HLSL_CLASS_CONSTANT_BUFFER; -+ type->dimy = 1; -+ type->e.resource.format = format; -+ hlsl_type_calculate_reg_size(ctx, type); -+ list_add_tail(&ctx->types, &type->entry); -+ return type; -+} -+ - static const char * get_case_insensitive_typename(const char *name) - { - static const char *const names[] = -@@ -956,8 +990,13 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ return hlsl_type_component_count(type->e.resource.format); -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -1038,10 +1077,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - case HLSL_CLASS_TECHNIQUE: - return t1->e.version == t2->e.version; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: - case HLSL_CLASS_VERTEX_SHADER: -@@ -1247,6 +1291,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const cha - list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); - else - list_add_tail(&ctx->globals->vars, &var->scope_entry); -+ var->is_synthetic = true; - } - return var; - } -@@ -1765,7 +1810,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type - } - - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, const struct vkd3d_shader_location *loc) -+ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, -+ unsigned int unroll_limit, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_loop *loop; - -@@ -1774,6 +1820,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); - hlsl_block_init(&loop->body); - hlsl_block_add_block(&loop->body, block); -+ -+ loop->unroll_type = unroll_type; -+ loop->unroll_limit = unroll_limit; - return &loop->node; - } - -@@ -1836,9 +1885,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct - return map->instrs[i].dst; - } - -- /* The block passed to hlsl_clone_block() should have been free of external -- * references. */ -- vkd3d_unreachable(); -+ return src; - } - - static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, -@@ -1935,7 +1982,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ - if (!clone_block(ctx, &body, &src->body, map)) - return NULL; - -- if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) -+ if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) - { - hlsl_block_cleanup(&body); - return NULL; -@@ -2407,10 +2454,21 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - return string; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ vkd3d_string_buffer_printf(string, "ConstantBuffer"); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } -+ return string; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -2735,6 +2793,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - static const char *const op_names[] = - { - [HLSL_OP0_VOID] = "void", -+ [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", - - [HLSL_OP1_ABS] = "abs", - [HLSL_OP1_BIT_NOT] = "~", -@@ -3086,6 +3145,33 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl - vkd3d_string_buffer_cleanup(&buffer); - } - -+void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) -+{ -+ unsigned int k, component_count = hlsl_type_component_count(var->data_type); -+ struct vkd3d_string_buffer buffer; -+ -+ vkd3d_string_buffer_init(&buffer); -+ if (!var->default_values) -+ { -+ vkd3d_string_buffer_printf(&buffer, "var \"%s\" has no default values.\n", var->name); -+ vkd3d_string_buffer_trace(&buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+ return; -+ } -+ -+ vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); -+ for (k = 0; k < component_count; ++k) -+ { -+ if (k % 4 == 0) -+ vkd3d_string_buffer_printf(&buffer, "\n "); -+ vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].value.u); -+ } -+ vkd3d_string_buffer_printf(&buffer, "\n"); -+ -+ vkd3d_string_buffer_trace(&buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+} -+ - void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) - { - struct hlsl_src *src, *next; -@@ -3319,9 +3405,25 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) - void hlsl_cleanup_semantic(struct hlsl_semantic *semantic) - { - vkd3d_free((void *)semantic->name); -+ vkd3d_free((void *)semantic->raw_name); - memset(semantic, 0, sizeof(*semantic)); - } - -+bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src) -+{ -+ *dst = *src; -+ dst->name = dst->raw_name = NULL; -+ if (src->name && !(dst->name = hlsl_strdup(ctx, src->name))) -+ return false; -+ if (src->raw_name && !(dst->raw_name = hlsl_strdup(ctx, src->raw_name))) -+ { -+ hlsl_cleanup_semantic(dst); -+ return false; -+ } -+ -+ return true; -+} -+ - static void free_function_decl(struct hlsl_ir_function_decl *decl) - { - unsigned int i; -@@ -3712,9 +3814,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - - ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 27814f3a56f..5832958712a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -78,10 +78,12 @@ enum hlsl_type_class - HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, - HLSL_CLASS_STRUCT, - HLSL_CLASS_ARRAY, -+ HLSL_CLASS_DEPTH_STENCIL_STATE, - HLSL_CLASS_DEPTH_STENCIL_VIEW, - HLSL_CLASS_EFFECT_GROUP, - HLSL_CLASS_PASS, - HLSL_CLASS_PIXEL_SHADER, -+ HLSL_CLASS_RASTERIZER_STATE, - HLSL_CLASS_RENDER_TARGET_VIEW, - HLSL_CLASS_SAMPLER, - HLSL_CLASS_STRING, -@@ -89,6 +91,7 @@ enum hlsl_type_class - HLSL_CLASS_TEXTURE, - HLSL_CLASS_UAV, - HLSL_CLASS_VERTEX_SHADER, -+ HLSL_CLASS_CONSTANT_BUFFER, - HLSL_CLASS_VOID, - }; - -@@ -222,6 +225,8 @@ struct hlsl_semantic - const char *name; - uint32_t index; - -+ /* Name exactly as it appears in the sources. */ -+ const char *raw_name; - /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ - bool reported_missing; - /* In case the variable or field that stores this semantic has already reported to use a -@@ -259,8 +264,20 @@ struct hlsl_struct_field - * struct. */ - struct hlsl_reg - { -- /* Index of the first register allocated. */ -+ /* Register number of the first register allocated. */ - uint32_t id; -+ /* For descriptors (buffer, texture, sampler, UAV) this is the base binding -+ * index of the descriptor. -+ * For 5.1 and above descriptors have space and may be arrayed, in which -+ * case the array shares a single register ID but has a range of register -+ * indices, and "id" and "index" are as a rule not equal. -+ * For versions below 5.1, the register number for descriptors is the same -+ * as its external binding index, so only "index" is used, and "id" is -+ * ignored. -+ * For numeric registers "index" is not used. */ -+ uint32_t index; -+ /* Register space of a descriptor. Not used for numeric registers. */ -+ uint32_t space; - /* Number of registers to be allocated. - * Unlike the variable's type's regsize, it is not expressed in register components, but rather - * in whole registers, and may depend on which components are used within the shader. */ -@@ -371,6 +388,7 @@ struct hlsl_attribute - #define HLSL_STORAGE_LINEAR 0x00010000 - #define HLSL_MODIFIER_SINGLE 0x00020000 - #define HLSL_MODIFIER_EXPORT 0x00040000 -+#define HLSL_STORAGE_ANNOTATION 0x00080000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -396,6 +414,14 @@ struct hlsl_reg_reservation - unsigned int offset_index; - }; - -+union hlsl_constant_value_component -+{ -+ uint32_t u; -+ int32_t i; -+ float f; -+ double d; -+}; -+ - struct hlsl_ir_var - { - struct hlsl_type *data_type; -@@ -418,6 +444,15 @@ struct hlsl_ir_var - /* Scope that contains annotations for this variable. */ - struct hlsl_scope *annotations; - -+ /* Array of default values the variable was initialized with, one for each component. -+ * Only for variables that need it, such as uniforms and variables inside constant buffers. -+ * This pointer is NULL for others. */ -+ struct hlsl_default_value -+ { -+ /* Default value, in case the component is a numeric value. */ -+ union hlsl_constant_value_component value; -+ } *default_values; -+ - /* A dynamic array containing the state block on the variable's declaration, if any. - * An array variable may contain multiple state blocks. - * A technique pass will always contain one. -@@ -460,6 +495,8 @@ struct hlsl_ir_var - uint32_t is_uniform : 1; - uint32_t is_param : 1; - uint32_t is_separated_resource : 1; -+ uint32_t is_synthetic : 1; -+ uint32_t has_explicit_bind_point : 1; - }; - - /* This struct is used to represent assignments in state block entries: -@@ -470,22 +507,31 @@ struct hlsl_ir_var - * name[lhs_index] = args[0] - * - or - - * name[lhs_index] = {args[0], args[1], ...}; -+ * -+ * This struct also represents function call syntax: -+ * name(args[0], args[1], ...) - */ - struct hlsl_state_block_entry - { -- /* For assignments, the name in the lhs. */ -+ /* Whether this entry is a function call. */ -+ bool is_function_call; -+ -+ /* For assignments, the name in the lhs. -+ * For functions, the name of the function. */ - char *name; - /* Resolved format-specific property identifier. */ - unsigned int name_id; - -- /* Whether the lhs in the assignment is indexed and, in that case, its index. */ -+ /* For assignments, whether the lhs of an assignment is indexed and, in -+ * that case, its index. */ - bool lhs_has_index; - unsigned int lhs_index; - -- /* Instructions present in the rhs. */ -+ /* Instructions present in the rhs or the function arguments. */ - struct hlsl_block *instrs; - -- /* For assignments, arguments of the rhs initializer. */ -+ /* For assignments, arguments of the rhs initializer. -+ * For function calls, the arguments themselves. */ - struct hlsl_src *args; - unsigned int args_count; - }; -@@ -556,12 +602,21 @@ struct hlsl_ir_if - struct hlsl_block else_block; - }; - -+enum hlsl_ir_loop_unroll_type -+{ -+ HLSL_IR_LOOP_UNROLL, -+ HLSL_IR_LOOP_FORCE_UNROLL, -+ HLSL_IR_LOOP_FORCE_LOOP -+}; -+ - struct hlsl_ir_loop - { - struct hlsl_ir_node node; - /* loop condition is stored in the body (as "if (!condition) break;") */ - struct hlsl_block body; - unsigned int next_index; /* liveness index of the end of the loop */ -+ unsigned int unroll_limit; -+ enum hlsl_ir_loop_unroll_type unroll_type; - }; - - struct hlsl_ir_switch_case -@@ -583,6 +638,7 @@ struct hlsl_ir_switch - enum hlsl_ir_expr_op - { - HLSL_OP0_VOID, -+ HLSL_OP0_RASTERIZER_SAMPLE_COUNT, - - HLSL_OP1_ABS, - HLSL_OP1_BIT_NOT, -@@ -775,13 +831,7 @@ struct hlsl_ir_constant - struct hlsl_ir_node node; - struct hlsl_constant_value - { -- union hlsl_constant_value_component -- { -- uint32_t u; -- int32_t i; -- float f; -- double d; -- } u[4]; -+ union hlsl_constant_value_component u[4]; - } value; - /* Constant register of type 'c' where the constant value is stored for SM1. */ - struct hlsl_reg reg; -@@ -811,6 +861,8 @@ struct hlsl_scope - bool loop; - /* The scope was created for the switch statement. */ - bool _switch; -+ /* The scope contains annotation variables. */ -+ bool annotations; - }; - - struct hlsl_profile_info -@@ -1249,6 +1301,10 @@ void hlsl_block_cleanup(struct hlsl_block *block); - bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); - - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); -+void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); -+ -+bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, -+ const struct vkd3d_shader_location *loc); - - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -@@ -1259,7 +1315,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); - - void hlsl_cleanup_deref(struct hlsl_deref *deref); -+ - void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); -+bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src); - - void hlsl_cleanup_ir_switch_cases(struct list *cases); - void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); -@@ -1342,7 +1400,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); - struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, const struct vkd3d_shader_location *loc); -+ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -@@ -1361,6 +1419,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ - unsigned int sample_count); - struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - struct hlsl_type *format, bool rasteriser_ordered); -+struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); - struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, - const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, -@@ -1432,10 +1491,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - - D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); - D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); --bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); --bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); --int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); -+bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); -+bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -+ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); -+ -+void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); -+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - - bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index a5923d8bf8e..55993dac2b4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -49,7 +49,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); - RESERVED1 auto|catch|char|class|const_cast|delete|dynamic_cast|enum - RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public - RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try --RESERVED4 typename|union|unsigned|using|virtual -+RESERVED4 typename|union|using|virtual - - WS [ \t] - NEWLINE (\n)|(\r\n) -@@ -164,6 +164,7 @@ textureCUBE {return KW_TEXTURECUBE; } - TextureCubeArray {return KW_TEXTURECUBEARRAY; } - true {return KW_TRUE; } - typedef {return KW_TYPEDEF; } -+unsigned {return KW_UNSIGNED; } - uniform {return KW_UNIFORM; } - vector {return KW_VECTOR; } - VertexShader {return KW_VERTEXSHADER; } -@@ -197,7 +198,9 @@ while {return KW_WHILE; } - struct hlsl_ctx *ctx = yyget_extra(yyscanner); - - yylval->name = hlsl_strdup(ctx, yytext); -- if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) -+ if (hlsl_version_ge(ctx, 5, 1) && !strcmp(yytext, "ConstantBuffer")) -+ return KW_CONSTANTBUFFER; -+ else if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) - return VAR_IDENTIFIER; - else if (hlsl_get_type(ctx->cur_scope, yytext, true, true)) - return TYPE_IDENTIFIER; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 9c1bdef926d..7b058a65bc1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -573,12 +573,91 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); - } - -+static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) -+{ -+ union hlsl_constant_value_component ret = {0}; -+ struct hlsl_ir_constant *constant; -+ struct hlsl_ir_node *node; -+ struct hlsl_block expr; -+ struct hlsl_src src; -+ -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (node->type) -+ { -+ case HLSL_IR_CONSTANT: -+ case HLSL_IR_EXPR: -+ case HLSL_IR_SWIZZLE: -+ case HLSL_IR_LOAD: -+ case HLSL_IR_INDEX: -+ continue; -+ case HLSL_IR_STORE: -+ if (hlsl_ir_store(node)->lhs.var->is_synthetic) -+ break; -+ /* fall-through */ -+ case HLSL_IR_CALL: -+ case HLSL_IR_IF: -+ case HLSL_IR_LOOP: -+ case HLSL_IR_JUMP: -+ case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_RESOURCE_STORE: -+ case HLSL_IR_SWITCH: -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Expected literal expression."); -+ break; -+ } -+ } -+ -+ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -+ return ret; -+ hlsl_block_add_block(&expr, block); -+ -+ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) -+ { -+ hlsl_block_cleanup(&expr); -+ return ret; -+ } -+ -+ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -+ hlsl_src_from_node(&src, node_from_block(&expr)); -+ hlsl_run_const_passes(ctx, &expr); -+ node = src.node; -+ hlsl_src_remove(&src); -+ -+ if (node->type == HLSL_IR_CONSTANT) -+ { -+ constant = hlsl_ir_constant(node); -+ ret = constant->value.u[0]; -+ } -+ else -+ { -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Failed to evaluate constant expression."); -+ } -+ -+ hlsl_block_cleanup(&expr); -+ -+ return ret; -+} -+ -+static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct vkd3d_shader_location *loc) -+{ -+ union hlsl_constant_value_component res; -+ -+ res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -+ return res.u; -+} -+ - static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, - struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; -+ unsigned int i, unroll_limit = 0; - struct hlsl_ir_node *loop; -- unsigned int i; - - if (attribute_list_has_duplicates(attributes)) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); -@@ -591,18 +670,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - const struct hlsl_attribute *attr = attributes->attrs[i]; - if (!strcmp(attr->name, "unroll")) - { -- if (attr->args_count) -+ if (attr->args_count > 1) - { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); -+ hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, -+ "Ignoring 'unroll' attribute with more than 1 argument."); -+ continue; - } -- else -+ -+ if (attr->args_count == 1) - { -- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); -+ struct hlsl_block expr; -+ hlsl_block_init(&expr); -+ if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) -+ return NULL; -+ -+ unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); -+ hlsl_block_cleanup(&expr); - } -+ -+ unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; - } - else if (!strcmp(attr->name, "loop")) - { -- /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ -+ unroll_type = HLSL_IR_LOOP_FORCE_LOOP; - } - else if (!strcmp(attr->name, "fastopt") - || !strcmp(attr->name, "allow_uav_condition")) -@@ -631,7 +721,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - else - list_move_head(&body->instrs, &cond->instrs); - -- if (!(loop = hlsl_new_loop(ctx, body, loc))) -+ if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) - goto oom; - hlsl_block_add_instr(init, loop); - -@@ -1013,6 +1103,10 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); - } - } -+ -+ if (hlsl_version_ge(ctx, 5, 1) && field->type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(field->type)) -+ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); -+ - vkd3d_free(v->arrays.sizes); - field->loc = v->loc; - field->name = v->name; -@@ -1210,12 +1304,42 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl - return true; - } - --static bool parse_reservation_index(const char *string, char *type, uint32_t *index) -+static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, -+ struct hlsl_reg_reservation *reservation) - { -- if (!sscanf(string + 1, "%u", index)) -- return false; -+ char *endptr; -+ -+ reservation->reg_type = ascii_tolower(string[0]); -+ -+ /* Prior to SM5.1, fxc simply ignored bracket offsets for 'b' types. */ -+ if (reservation->reg_type == 'b' && hlsl_version_lt(ctx, 5, 1)) -+ { -+ bracket_offset = 0; -+ } -+ -+ if (string[1] == '\0') -+ { -+ reservation->reg_index = bracket_offset; -+ return true; -+ } -+ -+ reservation->reg_index = strtoul(string + 1, &endptr, 10) + bracket_offset; -+ -+ if (*endptr) -+ { -+ /* fxc for SM >= 4 treats all parse failures for 'b' types as successes, -+ * setting index to -1. It will later fail while validating slot limits. */ -+ if (reservation->reg_type == 'b' && hlsl_version_ge(ctx, 4, 0)) -+ { -+ reservation->reg_index = -1; -+ return true; -+ } -+ -+ /* All other types tolerate leftover characters. */ -+ if (endptr == string + 1) -+ return false; -+ } - -- *type = ascii_tolower(string[0]); - return true; - } - -@@ -1286,72 +1410,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * - return block; - } - --static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -- const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_constant *constant; -- struct hlsl_ir_node *node; -- struct hlsl_block expr; -- unsigned int ret = 0; -- struct hlsl_src src; -- -- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -- { -- switch (node->type) -- { -- case HLSL_IR_CONSTANT: -- case HLSL_IR_EXPR: -- case HLSL_IR_SWIZZLE: -- case HLSL_IR_LOAD: -- case HLSL_IR_INDEX: -- continue; -- case HLSL_IR_CALL: -- case HLSL_IR_IF: -- case HLSL_IR_LOOP: -- case HLSL_IR_JUMP: -- case HLSL_IR_RESOURCE_LOAD: -- case HLSL_IR_RESOURCE_STORE: -- case HLSL_IR_STORE: -- case HLSL_IR_SWITCH: -- case HLSL_IR_STATEBLOCK_CONSTANT: -- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Expected literal expression."); -- } -- } -- -- if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -- return 0; -- hlsl_block_add_block(&expr, block); -- -- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), -- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) -- { -- hlsl_block_cleanup(&expr); -- return 0; -- } -- -- /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -- hlsl_src_from_node(&src, node_from_block(&expr)); -- hlsl_run_const_passes(ctx, &expr); -- node = src.node; -- hlsl_src_remove(&src); -- -- if (node->type == HLSL_IR_CONSTANT) -- { -- constant = hlsl_ir_constant(node); -- ret = constant->value.u[0].u; -- } -- else -- { -- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Failed to evaluate constant expression."); -- } -- -- hlsl_block_cleanup(&expr); -- -- return ret; --} -- - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) - { - /* Scalar vars can be converted to pretty much everything */ -@@ -1862,12 +1920,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned - return true; - } - -+static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) -+{ -+ /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. -+ * components are indexed by their sources. i.e. the first component comes from the first -+ * component of the rhs. */ -+ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; -+ -+ /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ -+ for (i = 0; i < 4; ++i) -+ { -+ if (*writemask & (1 << i)) -+ { -+ unsigned int s = (*swizzle >> (i * 8)) & 0xff; -+ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int idx = x + y * 4; -+ new_swizzle |= s << (bit++ * 8); -+ if (new_writemask & (1 << idx)) -+ return false; -+ new_writemask |= 1 << idx; -+ } -+ } -+ width = bit; -+ -+ /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the -+ * incoming vector. */ -+ bit = 0; -+ for (i = 0; i < 16; ++i) -+ { -+ for (j = 0; j < width; ++j) -+ { -+ unsigned int s = (new_swizzle >> (j * 8)) & 0xff; -+ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int idx = x + y * 4; -+ if (idx == i) -+ inverted |= j << (bit++ * 2); -+ } -+ } -+ -+ *swizzle = inverted; -+ *writemask = new_writemask; -+ *ret_width = width; -+ return true; -+} -+ - static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) - { - struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_node *copy; -- unsigned int writemask = 0; -+ unsigned int writemask = 0, width = 0; -+ bool matrix_writemask = false; - - if (assign_op == ASSIGN_OP_SUB) - { -@@ -1885,7 +1988,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - } - - if (hlsl_is_numeric_type(lhs_type)) -+ { - writemask = (1 << lhs_type->dimx) - 1; -+ width = lhs_type->dimx; -+ } - - if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) - return NULL; -@@ -1902,12 +2008,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); - struct hlsl_ir_node *new_swizzle; - uint32_t s = swizzle->swizzle; -- unsigned int width; - -- if (lhs->data_type->class == HLSL_CLASS_MATRIX) -- hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); -+ assert(!matrix_writemask); - -- if (!invert_swizzle(&s, &writemask, &width)) -+ if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) -+ { -+ if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) -+ { -+ hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); -+ return NULL; -+ } -+ if (!invert_swizzle_matrix(&s, &writemask, &width)) -+ { -+ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); -+ return NULL; -+ } -+ matrix_writemask = true; -+ } -+ else if (!invert_swizzle(&s, &writemask, &width)) - { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); - return NULL; -@@ -1955,7 +2073,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - -- if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) -+ if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, - "Resource store expressions must write to all components."); - -@@ -1971,12 +2089,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&resource_deref); - } -+ else if (matrix_writemask) -+ { -+ struct hlsl_deref deref; -+ unsigned int i, j, k = 0; -+ -+ hlsl_init_deref_from_index_chain(ctx, &deref, lhs); -+ -+ for (i = 0; i < lhs->data_type->dimy; ++i) -+ { -+ for (j = 0; j < lhs->data_type->dimx; ++j) -+ { -+ struct hlsl_ir_node *load; -+ struct hlsl_block store_block; -+ const unsigned int idx = i * 4 + j; -+ const unsigned int component = i * lhs->data_type->dimx + j; -+ -+ if (!(writemask & (1 << idx))) -+ continue; -+ -+ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) -+ { -+ hlsl_cleanup_deref(&deref); -+ return NULL; -+ } -+ -+ if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) -+ { -+ hlsl_cleanup_deref(&deref); -+ return NULL; -+ } -+ hlsl_block_add_block(block, &store_block); -+ } -+ } -+ -+ hlsl_cleanup_deref(&deref); -+ } - else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) - { - struct hlsl_ir_index *row = hlsl_ir_index(lhs); - struct hlsl_ir_node *mat = row->val.node; - unsigned int i, k = 0; - -+ assert(!matrix_writemask); -+ - for (i = 0; i < mat->data_type->dimx; ++i) - { - struct hlsl_ir_node *cell, *load, *store, *c; -@@ -2067,6 +2223,53 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - return true; - } - -+/* For some reason, for matrices, values from default value initializers end up in different -+ * components than from regular initializers. Default value initializers fill the matrix in -+ * vertical reading order (left-to-right top-to-bottom) instead of regular reading order -+ * (top-to-bottom left-to-right), so they have to be adjusted. */ -+static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, -+ struct hlsl_type *type, unsigned int index) -+{ -+ unsigned int element_comp_count, element, x, y, i; -+ unsigned int base = 0; -+ -+ if (ctx->profile->major_version < 4) -+ return index; -+ -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) -+ return index; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_MATRIX: -+ x = index / type->dimy; -+ y = index % type->dimy; -+ return y * type->dimx + x; -+ -+ case HLSL_CLASS_ARRAY: -+ element_comp_count = hlsl_type_component_count(type->e.array.type); -+ element = index / element_comp_count; -+ base = element * element_comp_count; -+ return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); -+ -+ case HLSL_CLASS_STRUCT: -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ struct hlsl_type *field_type = type->e.record.fields[i].type; -+ -+ element_comp_count = hlsl_type_component_count(field_type); -+ if (index - base < element_comp_count) -+ return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); -+ base += element_comp_count; -+ } -+ break; -+ -+ default: -+ return index; -+ } -+ vkd3d_unreachable(); -+} -+ - static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) - { -@@ -2087,12 +2290,29 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - -- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -- return; -+ if (dst->default_values) -+ { -+ struct hlsl_default_value default_value = {0}; -+ unsigned int dst_index; - -- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -- return; -- hlsl_block_add_block(instrs, &block); -+ if (!hlsl_clone_block(ctx, &block, instrs)) -+ return; -+ default_value.value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); -+ -+ dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); -+ dst->default_values[dst_index] = default_value; -+ -+ hlsl_block_cleanup(&block); -+ } -+ else -+ { -+ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -+ return; -+ -+ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -+ return; -+ hlsl_block_add_block(instrs, &block); -+ } - - ++*store_index; - } -@@ -2171,6 +2391,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - struct hlsl_semantic new_semantic; - uint32_t modifiers = v->modifiers; - bool unbounded_res_array = false; -+ bool constant_buffer = false; - struct hlsl_ir_var *var; - struct hlsl_type *type; - bool local = true; -@@ -2190,6 +2411,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); - } - -+ if (type->class == HLSL_CLASS_CONSTANT_BUFFER) -+ { -+ type = type->e.resource.format; -+ constant_buffer = true; -+ } -+ - if (unbounded_res_array) - { - if (v->arrays.count == 1) -@@ -2246,17 +2473,22 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - } - } - -+ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -+ { -+ /* SM 5.1/6.x descriptor arrays act differently from previous versions. -+ * Not only are they treated as a single object in reflection, but they -+ * act as a single component for the purposes of assignment and -+ * initialization. */ -+ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); -+ } -+ - if (!(var_name = vkd3d_strdup(v->name))) - return; - -- new_semantic = v->semantic; -- if (v->semantic.name) -+ if (!hlsl_clone_semantic(ctx, &new_semantic, &v->semantic)) - { -- if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) -- { -- vkd3d_free(var_name); -- return; -- } -+ vkd3d_free(var_name); -+ return; - } - - if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) -@@ -2266,7 +2498,16 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - return; - } - -- var->buffer = ctx->cur_buffer; -+ if (constant_buffer && ctx->cur_scope == ctx->globals) -+ { -+ if (!(var_name = vkd3d_strdup(v->name))) -+ return; -+ var->buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, var_name, modifiers, &v->reg_reservation, NULL, &v->loc); -+ } -+ else -+ { -+ var->buffer = ctx->cur_buffer; -+ } - - if (var->buffer == ctx->globals_buffer) - { -@@ -2289,8 +2530,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) -+ && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ { - check_invalid_object_fields(ctx, var); -+ } - - if ((func = hlsl_get_first_func_decl(ctx, var->name))) - { -@@ -2348,6 +2592,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - { - struct parse_variable_def *v, *v_next; - struct hlsl_block *initializers; -+ unsigned int component_count; - struct hlsl_ir_var *var; - struct hlsl_type *type; - -@@ -2371,6 +2616,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - } - - type = var->data_type; -+ component_count = hlsl_type_component_count(type); - - var->state_blocks = v->state_blocks; - var->state_block_count = v->state_block_count; -@@ -2379,51 +2625,78 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - v->state_block_capacity = 0; - v->state_blocks = NULL; - -- if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) -+ if (var->state_blocks && component_count != var->state_block_count) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u state blocks, but got %u.", -- hlsl_type_component_count(type), var->state_block_count); -+ "Expected %u state blocks, but got %u.", component_count, var->state_block_count); - free_parse_variable_def(v); - continue; - } - - if (v->initializer.args_count) - { -- if (v->initializer.braces) -+ unsigned int store_index = 0; -+ bool is_default_values_initializer; -+ unsigned int size, k; -+ -+ is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) -+ || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ || ctx->cur_scope->annotations; -+ -+ if (is_default_values_initializer) - { -- unsigned int size = initializer_size(&v->initializer); -- unsigned int store_index = 0; -- unsigned int k; -+ /* Default values might have been allocated already for another variable of the same name, -+ in the same scope. */ -+ if (var->default_values) -+ { -+ free_parse_variable_def(v); -+ continue; -+ } - -- if (hlsl_type_component_count(type) != size) -+ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) - { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u components in initializer, but got %u.", -- hlsl_type_component_count(type), size); - free_parse_variable_def(v); - continue; - } -+ } - -- for (k = 0; k < v->initializer.args_count; ++k) -+ if (!v->initializer.braces) -+ { -+ if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) - { -- initialize_var_components(ctx, v->initializer.instrs, var, -- &store_index, v->initializer.args[k]); -+ free_parse_variable_def(v); -+ continue; - } -+ -+ v->initializer.args[0] = node_from_block(v->initializer.instrs); - } -- else -+ -+ size = initializer_size(&v->initializer); -+ if (component_count != size) - { -- struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u components in initializer, but got %u.", component_count, size); -+ free_parse_variable_def(v); -+ continue; -+ } - -- assert(v->initializer.args_count == 1); -- hlsl_block_add_instr(v->initializer.instrs, &load->node); -- add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); -+ for (k = 0; k < v->initializer.args_count; ++k) -+ { -+ initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); - } - -- if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ if (is_default_values_initializer) -+ { -+ hlsl_dump_var_default_values(var); -+ } -+ else if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ { - hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); -+ } - else -+ { - hlsl_block_add_block(initializers, v->initializer.instrs); -+ } - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -@@ -3353,6 +3626,34 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); - } - -+static bool intrinsic_faceforward(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s faceforward(%s n, %s i, %s ng)\n" -+ "{\n" -+ " return dot(i, ng) < 0 ? n : -n;\n" -+ "}\n"; -+ -+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) -+ return false; -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name, type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "faceforward", body); -+ vkd3d_free(body); -+ if (!func) -+ return false; -+ -+ return add_user_call(ctx, func, params, loc); -+} -+ - static bool intrinsic_floor(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4032,6 +4333,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, - const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) - { -+ unsigned int sampler_dim = hlsl_sampler_dim_count(dim); - struct hlsl_resource_load_params load_params = { 0 }; - const struct hlsl_type *sampler_type; - struct hlsl_ir_node *coords, *sample; -@@ -4043,11 +4345,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - } - -- if (params->args_count == 4) -- { -- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); -- } -- - sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_SAMPLER - || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) -@@ -4061,18 +4358,22 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - hlsl_release_string_buffer(ctx, string); - } - -- if (!strcmp(name, "tex2Dlod")) -+ if (!strcmp(name, "tex2Dbias") -+ || !strcmp(name, "tex2Dlod")) - { - struct hlsl_ir_node *lod, *c; - -- load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ if (!strcmp(name, "tex2Dlod")) -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ else -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - -- if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) -+ if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) - return false; - hlsl_block_add_instr(params->instrs, c); - -- if (!(coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, -- hlsl_sampler_dim_count(dim)), loc))) -+ if (!(coords = add_implicit_conversion(ctx, params->instrs, c, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - { - return false; - } -@@ -4099,14 +4400,13 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (hlsl_version_ge(ctx, 4, 0)) - { -- unsigned int count = hlsl_sampler_dim_count(dim); - struct hlsl_ir_node *divisor; - -- if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), count, coords, loc))) -+ if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) - return false; - hlsl_block_add_instr(params->instrs, divisor); - -- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), count, coords, loc))) -+ if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) - return false; - hlsl_block_add_instr(params->instrs, coords); - -@@ -4120,12 +4420,34 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; - } - } -+ else if (params->args_count == 4) /* Gradient sampling. */ -+ { -+ if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; -+ } - else - { - load_params.type = HLSL_RESOURCE_SAMPLE; - - if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - { - return false; - } -@@ -4181,12 +4503,30 @@ static bool intrinsic_tex1D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); - } - -+static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); -+} -+ - static bool intrinsic_tex2D(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); - } - -+static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); -+} -+ -+static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); -+} -+ - static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4205,6 +4545,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); - } - -+static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); -+} -+ - static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4217,6 +4563,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); - } - -+static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); -+} -+ - static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4336,6 +4688,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - return true; - } - -+static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *expr; -+ -+ if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, -+ operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, expr); -+ -+ return true; -+} -+ - static const struct intrinsic_function - { - const char *name; -@@ -4348,6 +4714,7 @@ intrinsic_functions[] = - { - /* Note: these entries should be kept in alphabetical order. */ - {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, -+ {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, - {"abs", 1, true, intrinsic_abs}, - {"acos", 1, true, intrinsic_acos}, - {"all", 1, true, intrinsic_all}, -@@ -4375,6 +4742,7 @@ intrinsic_functions[] = - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, - {"exp2", 1, true, intrinsic_exp2}, -+ {"faceforward", 3, true, intrinsic_faceforward}, - {"floor", 1, true, intrinsic_floor}, - {"fmod", 2, true, intrinsic_fmod}, - {"frac", 1, true, intrinsic_frac}, -@@ -4406,12 +4774,17 @@ intrinsic_functions[] = - {"tan", 1, true, intrinsic_tan}, - {"tanh", 1, true, intrinsic_tanh}, - {"tex1D", -1, false, intrinsic_tex1D}, -+ {"tex1Dgrad", 4, false, intrinsic_tex1Dgrad}, - {"tex2D", -1, false, intrinsic_tex2D}, -+ {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, -+ {"tex2Dgrad", 4, false, intrinsic_tex2Dgrad}, - {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, - {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, - {"tex3D", -1, false, intrinsic_tex3D}, -+ {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, - {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, - {"texCUBE", -1, false, intrinsic_texCUBE}, -+ {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, - {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, - {"transpose", 1, true, intrinsic_transpose}, - {"trunc", 1, true, intrinsic_trunc}, -@@ -5481,6 +5854,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_BREAK - %token KW_BUFFER - %token KW_CASE -+%token KW_CONSTANTBUFFER - %token KW_CBUFFER - %token KW_CENTROID - %token KW_COLUMN_MAJOR -@@ -5566,6 +5940,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_TEXTURECUBEARRAY - %token KW_TRUE - %token KW_TYPEDEF -+%token KW_UNSIGNED - %token KW_UNIFORM - %token KW_VECTOR - %token KW_VERTEXSHADER -@@ -5670,6 +6045,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - - %type if_body - -+%type array -+ - %type var_modifiers - - %type any_identifier -@@ -5717,8 +6094,7 @@ hlsl_prog: - | hlsl_prog buffer_declaration buffer_body - | hlsl_prog declaration_statement - { -- if (!list_empty(&$2->instrs)) -- hlsl_fixme(ctx, &@2, "Uniform initializer."); -+ hlsl_block_add_block(&ctx->static_initializers, $2); - destroy_block($2); - } - | hlsl_prog preproc_directive -@@ -5742,19 +6118,31 @@ pass: - - annotations_list: - variables_def_typed ';' -+ { -+ struct hlsl_block *block; -+ -+ block = initialize_vars(ctx, $1); -+ destroy_block(block); -+ } - | annotations_list variables_def_typed ';' -+ { -+ struct hlsl_block *block; -+ -+ block = initialize_vars(ctx, $2); -+ destroy_block(block); -+ } - - annotations_opt: - %empty - { - $$ = NULL; - } -- | '<' scope_start '>' -+ | '<' annotations_scope_start '>' - { - hlsl_pop_scope(ctx); - $$ = NULL; - } -- | '<' scope_start annotations_list '>' -+ | '<' annotations_scope_start annotations_list '>' - { - struct hlsl_scope *scope = ctx->cur_scope; - -@@ -6282,6 +6670,13 @@ switch_scope_start: - ctx->cur_scope->_switch = true; - } - -+annotations_scope_start: -+ %empty -+ { -+ hlsl_push_scope(ctx); -+ ctx->cur_scope->annotations = true; -+ } -+ - var_identifier: - VAR_IDENTIFIER - | NEW_IDENTIFIER -@@ -6315,6 +6710,9 @@ semantic: - { - char *p; - -+ if (!($$.raw_name = hlsl_strdup(ctx, $2))) -+ YYABORT; -+ - for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) - ; - $$.name = $2; -@@ -6330,22 +6728,34 @@ register_reservation: - ':' KW_REGISTER '(' any_identifier ')' - { - memset(&$$, 0, sizeof($$)); -- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $4, 0, &$$)) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ -+ vkd3d_free($4); -+ } -+ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ')' -+ { -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) -+ { - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $4); -+ } - - vkd3d_free($4); -+ vkd3d_free($6); - } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' - { - memset(&$$, 0, sizeof($$)); -- if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ if (parse_reservation_index(ctx, $6, 0, &$$)) - { - hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); - } - else if (parse_reservation_space($6, &$$.reg_space)) - { -- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $4, 0, &$$)) - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $4); - } -@@ -6358,12 +6768,45 @@ register_reservation: - vkd3d_free($4); - vkd3d_free($6); - } -+ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ',' any_identifier ')' -+ { -+ memset(&$$, 0, sizeof($$)); -+ -+ if (!parse_reservation_space($9, &$$.reg_space)) -+ hlsl_error(ctx, &@9, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $9); -+ -+ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) -+ { -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ } -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($9); -+ } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ } -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($8); -+ } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' - { - hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); - - memset(&$$, 0, sizeof($$)); -- if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $6, 0, &$$)) - hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $6); - -@@ -6375,6 +6818,26 @@ register_reservation: - vkd3d_free($6); - vkd3d_free($8); - } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ',' any_identifier ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ } -+ -+ if (!parse_reservation_space($11, &$$.reg_space)) -+ hlsl_error(ctx, &@11, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $11); -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($8); -+ vkd3d_free($11); -+ } - - packoffset_reservation: - ':' KW_PACKOFFSET '(' any_identifier ')' -@@ -6449,8 +6912,13 @@ parameter: - } - type = hlsl_new_array_type(ctx, type, $4.sizes[i]); - } -+ vkd3d_free($4.sizes); -+ - $$.type = type; - -+ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -+ hlsl_fixme(ctx, &@2, "Shader model 5.1+ resource array."); -+ - $$.name = $3; - $$.semantic = $5.semantic; - $$.reg_reservation = $5.reg_reservation; -@@ -6713,6 +7181,26 @@ type_no_void: - } - vkd3d_free($1); - } -+ | KW_UNSIGNED TYPE_IDENTIFIER -+ { -+ struct hlsl_type *type = hlsl_get_type(ctx->cur_scope, $2, true, true); -+ -+ if (hlsl_is_numeric_type(type) && type->e.numeric.type == HLSL_TYPE_INT) -+ { -+ if (!(type = hlsl_type_clone(ctx, type, 0, 0))) -+ YYABORT; -+ vkd3d_free((void *)type->name); -+ type->name = NULL; -+ type->e.numeric.type = HLSL_TYPE_UINT; -+ } -+ else -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "The 'unsigned' keyword can't be used with type %s.", $2); -+ } -+ -+ $$ = type; -+ } - | KW_STRUCT TYPE_IDENTIFIER - { - $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); -@@ -6724,6 +7212,10 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "RenderTargetView", true, true); - } -+ | KW_DEPTHSTENCILSTATE -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilState", true, true); -+ } - | KW_DEPTHSTENCILVIEW - { - $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); -@@ -6736,6 +7228,17 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); - } -+ | KW_CONSTANTBUFFER '<' type '>' -+ { -+ if ($3->class != HLSL_CLASS_STRUCT) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "ConstantBuffer<...> requires user-defined structure type."); -+ $$ = hlsl_new_cb_type(ctx, $3); -+ } -+ | KW_RASTERIZERSTATE -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); -+ } - - type: - type_no_void -@@ -6932,6 +7435,34 @@ state_block: - hlsl_src_from_node(&entry->args[i], $5.args[i]); - vkd3d_free($5.args); - -+ $$ = $1; -+ state_block_add_entry($$, entry); -+ } -+ | state_block any_identifier '(' func_arguments ')' ';' -+ { -+ struct hlsl_state_block_entry *entry; -+ unsigned int i; -+ -+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) -+ YYABORT; -+ -+ entry->is_function_call = true; -+ -+ entry->name = $2; -+ entry->lhs_has_index = false; -+ entry->lhs_index = 0; -+ -+ entry->instrs = $4.instrs; -+ -+ entry->args_count = $4.args_count; -+ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) -+ YYABORT; -+ for (i = 0; i < entry->args_count; ++i) -+ hlsl_src_from_node(&entry->args[i], $4.args[i]); -+ vkd3d_free($4.args); -+ -+ hlsl_validate_state_block_entry(ctx, entry, &@4); -+ - $$ = $1; - state_block_add_entry($$, entry); - } -@@ -7020,52 +7551,43 @@ variable_def_typed: - $$->modifiers_loc = @1; - } - --arrays: -- %empty -+array: -+ '[' ']' - { -- $$.sizes = NULL; -- $$.count = 0; -+ $$ = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; - } -- | '[' expr ']' arrays -+ | '[' expr ']' - { -- uint32_t *new_array; -- unsigned int size; -- -- size = evaluate_static_expression_as_uint(ctx, $2, &@2); -- -- destroy_block($2); -+ $$ = evaluate_static_expression_as_uint(ctx, $2, &@2); - -- $$ = $4; -- -- if (!size) -+ if (!$$) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, - "Array size is not a positive integer constant."); -- vkd3d_free($$.sizes); - YYABORT; - } - -- if (size > 65536) -+ if ($$ > 65536) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, -- "Array size %u is not between 1 and 65536.", size); -- vkd3d_free($$.sizes); -+ "Array size %u is not between 1 and 65536.", $$); - YYABORT; - } - -- if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) -- { -- vkd3d_free($$.sizes); -- YYABORT; -- } -- $$.sizes = new_array; -- $$.sizes[$$.count++] = size; -+ destroy_block($2); - } -- | '[' ']' arrays -+ -+arrays: -+ %empty -+ { -+ $$.sizes = NULL; -+ $$.count = 0; -+ } -+ | array arrays - { - uint32_t *new_array; - -- $$ = $3; -+ $$ = $2; - - if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) - { -@@ -7074,7 +7596,7 @@ arrays: - } - - $$.sizes = new_array; -- $$.sizes[$$.count++] = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; -+ $$.sizes[$$.count++] = $1; - } - - var_modifiers: -@@ -7156,6 +7678,8 @@ var_modifiers: - } - | var_identifier var_modifiers - { -+ $$ = $2; -+ - if (!strcmp($1, "precise")) - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); - else if (!strcmp($1, "single")) -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index bdb72a1fab9..7e4f168675e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -218,6 +218,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, - uniform->is_uniform = 1; - uniform->is_param = temp->is_param; - uniform->buffer = temp->buffer; -+ if (temp->default_values) -+ { -+ /* Transfer default values from the temp to the uniform. */ -+ assert(!uniform->default_values); -+ assert(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); -+ uniform->default_values = temp->default_values; -+ temp->default_values = NULL; -+ } - - if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) - return; -@@ -312,7 +320,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - } - } - -- if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) -+ if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) - { - vkd3d_free(new_name); - return NULL; -@@ -1623,9 +1631,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: -@@ -1635,6 +1645,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_ARRAY: - case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_CONSTANT_BUFFER: - /* FIXME: Actually we shouldn't even get here, but we don't split - * matrices yet. */ - return false; -@@ -1970,6 +1981,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return progress; - } - -+enum validation_result -+{ -+ DEREF_VALIDATION_OK, -+ DEREF_VALIDATION_OUT_OF_BOUNDS, -+ DEREF_VALIDATION_NOT_CONSTANT, -+}; -+ -+static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, -+ const struct hlsl_deref *deref) -+{ -+ struct hlsl_type *type = deref->var->data_type; -+ unsigned int i; -+ -+ for (i = 0; i < deref->path_len; ++i) -+ { -+ struct hlsl_ir_node *path_node = deref->path[i].node; -+ unsigned int idx = 0; -+ -+ assert(path_node); -+ if (path_node->type != HLSL_IR_CONSTANT) -+ return DEREF_VALIDATION_NOT_CONSTANT; -+ -+ /* We should always have generated a cast to UINT. */ -+ assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ if (idx >= type->dimx) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Vector index is out of bounds. %u/%u", idx, type->dimx); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_MATRIX: -+ if (idx >= hlsl_type_major_size(type)) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_ARRAY: -+ if (idx >= type->e.array.elements_count) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); -+ } -+ -+ return DEREF_VALIDATION_OK; -+} -+ - static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - const char *usage) - { -@@ -1987,60 +2068,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct - } - } - --static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - void *context) - { -- unsigned int start, count; -- -- if (instr->type == HLSL_IR_RESOURCE_LOAD) -+ switch (instr->type) - { -- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -- -- if (!load->resource.var->is_uniform) -+ case HLSL_IR_RESOURCE_LOAD: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Loaded resource must have a single uniform source."); -+ struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -+ -+ if (!load->resource.var->is_uniform) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Loaded resource must have a single uniform source."); -+ } -+ else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Loaded resource from \"%s\" must be determinable at compile time.", -+ load->resource.var->name); -+ note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); -+ } -+ -+ if (load->sampler.var) -+ { -+ if (!load->sampler.var->is_uniform) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Resource load sampler must have a single uniform source."); -+ } -+ else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Resource load sampler from \"%s\" must be determinable at compile time.", -+ load->sampler.var->name); -+ note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); -+ } -+ } -+ break; - } -- else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) -+ case HLSL_IR_RESOURCE_STORE: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Loaded resource from \"%s\" must be determinable at compile time.", -- load->resource.var->name); -- note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); -- } -+ struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - -- if (load->sampler.var) -- { -- if (!load->sampler.var->is_uniform) -+ if (!store->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Resource load sampler must have a single uniform source."); -+ "Accessed resource must have a single uniform source."); - } -- else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) -+ else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Resource load sampler from \"%s\" must be determinable at compile time.", -- load->sampler.var->name); -- note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); -+ "Accessed resource from \"%s\" must be determinable at compile time.", -+ store->resource.var->name); -+ note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); - } -+ break; - } -- } -- else if (instr->type == HLSL_IR_RESOURCE_STORE) -- { -- struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); -- -- if (!store->resource.var->is_uniform) -+ case HLSL_IR_LOAD: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Accessed resource must have a single uniform source."); -+ struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ validate_component_index_range_from_deref(ctx, &load->src); -+ break; - } -- else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) -+ case HLSL_IR_STORE: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Accessed resource from \"%s\" must be determinable at compile time.", -- store->resource.var->name); -- note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); -+ struct hlsl_ir_store *store = hlsl_ir_store(instr); -+ validate_component_index_range_from_deref(ctx, &store->lhs); -+ break; - } -+ default: -+ break; - } - - return false; -@@ -2554,11 +2652,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - case HLSL_RESOURCE_RESINFO: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: -- case HLSL_RESOURCE_SAMPLE_GRAD: - case HLSL_RESOURCE_SAMPLE_INFO: - return false; - - case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_GRAD: - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_PROJ: -@@ -3815,15 +3913,16 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -+ const struct hlsl_reg_reservation *reservation = &var->reg_reservation; - unsigned int r; - -- if (var->reg_reservation.reg_type) -+ if (reservation->reg_type) - { - for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) - { - if (var->regs[r].allocation_size > 0) - { -- if (var->reg_reservation.reg_type != get_regset_name(r)) -+ if (reservation->reg_type != get_regset_name(r)) - { - struct vkd3d_string_buffer *type_string; - -@@ -3839,10 +3938,8 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - else - { - var->regs[r].allocated = true; -- var->regs[r].id = var->reg_reservation.reg_index; -- TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index + var->regs[r].allocation_size); -+ var->regs[r].space = reservation->reg_space; -+ var->regs[r].index = reservation->reg_index; - } - } - } -@@ -4181,8 +4278,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, - { - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; - -+ /* FIXME: We could potentially pack structs or arrays more efficiently... */ -+ - if (type->class <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); -+ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); - else - return allocate_range(ctx, allocator, first_write, last_read, reg_size); - } -@@ -4589,6 +4688,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) - - static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -+ struct register_allocator allocator_used = {0}; - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -@@ -4597,6 +4697,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC]; - - if (!var->is_uniform || reg_size == 0) - continue; -@@ -4609,12 +4710,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - assert(reg_size % 4 == 0); - for (i = 0; i < reg_size / 4; ++i) - { -- if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ if (i < bind_count) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -- "Overlapping register() reservations on 'c%u'.", reg_idx + i); -+ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Overlapping register() reservations on 'c%u'.", reg_idx + i); -+ } -+ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); - } -- - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); - } - -@@ -4627,6 +4731,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - } - } - -+ vkd3d_free(allocator_used.allocations); -+ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; -@@ -4697,7 +4803,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - if (ctx->profile->major_version < 4) - { -- D3DSHADER_PARAM_REGISTER_TYPE sm1_type; -+ struct vkd3d_shader_version version; - D3DDECLUSAGE usage; - uint32_t usage_idx; - -@@ -4705,8 +4811,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - return; - -- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); -- if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ builtin = hlsl_sm1_register_from_semantic(&version, -+ var->semantic.name, var->semantic.index, output, &type, ®); -+ if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); -@@ -4715,7 +4825,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - if ((!output && !var->last_read) || (output && !var->first_write)) - return; -- type = (enum vkd3d_shader_register_type)sm1_type; - } - else - { -@@ -4762,13 +4871,14 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) - } - } - --static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) -+static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) - { - const struct hlsl_buffer *buffer; - - LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) - { -- if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) -+ if (buffer->reservation.reg_type == 'b' -+ && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) - return buffer; - } - return NULL; -@@ -4783,6 +4893,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - if (register_reservation) - { - var->buffer_offset = 4 * var->reg_reservation.reg_index; -+ var->has_explicit_bind_point = 1; - } - else - { -@@ -4815,6 +4926,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - } - } - var->buffer_offset = var->reg_reservation.offset_index; -+ var->has_explicit_bind_point = 1; - } - else - { -@@ -4913,11 +5025,19 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) - } - } - -+static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx) -+{ -+ if (hlsl_version_ge(ctx, 5, 1)) -+ return UINT_MAX; -+ -+ return 13; -+} -+ - static void allocate_buffers(struct hlsl_ctx *ctx) - { - struct hlsl_buffer *buffer; -+ uint32_t index = 0, id = 0; - struct hlsl_ir_var *var; -- uint32_t index = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -4938,32 +5058,59 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - if (buffer->type == HLSL_BUFFER_CONSTANT) - { -- if (buffer->reservation.reg_type == 'b') -+ const struct hlsl_reg_reservation *reservation = &buffer->reservation; -+ -+ if (reservation->reg_type == 'b') - { -- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); -+ const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, -+ reservation->reg_space, reservation->reg_index); -+ unsigned int max_index = get_max_cbuffer_reg_index(ctx); -+ -+ if (buffer->reservation.reg_index > max_index) -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Buffer reservation cb%u exceeds target's maximum (cb%u).", -+ buffer->reservation.reg_index, max_index); - - if (reserved_buffer && reserved_buffer != buffer) - { - hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); -+ "Multiple buffers bound to space %u, index %u.", -+ reservation->reg_space, reservation->reg_index); - hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, -- "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); -+ "Buffer %s is already bound to space %u, index %u.", -+ reserved_buffer->name, reservation->reg_space, reservation->reg_index); - } - -- buffer->reg.id = buffer->reservation.reg_index; -+ buffer->reg.space = reservation->reg_space; -+ buffer->reg.index = reservation->reg_index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ buffer->reg.id = id++; -+ else -+ buffer->reg.id = buffer->reg.index; - buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; -- TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); -+ TRACE("Allocated reserved %s to space %u, index %u, id %u.\n", -+ buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id); - } -- else if (!buffer->reservation.reg_type) -+ else if (!reservation->reg_type) - { -- while (get_reserved_buffer(ctx, index)) -+ unsigned int max_index = get_max_cbuffer_reg_index(ctx); -+ while (get_reserved_buffer(ctx, 0, index)) - ++index; - -- buffer->reg.id = index; -+ if (index > max_index) -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Too many buffers allocated, target's maximum is %u.", max_index); -+ -+ buffer->reg.space = 0; -+ buffer->reg.index = index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ buffer->reg.id = id++; -+ else -+ buffer->reg.id = buffer->reg.index; - buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; -- TRACE("Allocated %s to cb%u.\n", buffer->name, index); -+ TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id); - ++index; - } - else -@@ -4980,7 +5127,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, -- uint32_t index, bool allocated_only) -+ uint32_t space, uint32_t index, bool allocated_only) - { - const struct hlsl_ir_var *var; - unsigned int start, count; -@@ -4995,12 +5142,18 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - start = var->reg_reservation.reg_index; - count = var->data_type->reg_size[regset]; - -+ if (var->reg_reservation.reg_space != space) -+ continue; -+ - if (!var->regs[regset].allocated && allocated_only) - continue; - } - else if (var->regs[regset].allocated) - { -- start = var->regs[regset].id; -+ if (var->regs[regset].space != space) -+ continue; -+ -+ start = var->regs[regset].index; - count = var->regs[regset].allocation_size; - } - else -@@ -5017,8 +5170,8 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - { - char regset_name = get_regset_name(regset); -+ uint32_t min_index = 0, id = 0; - struct hlsl_ir_var *var; -- uint32_t min_index = 0; - - if (regset == HLSL_REGSET_UAVS) - { -@@ -5041,35 +5194,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - if (var->regs[regset].allocated) - { - const struct hlsl_ir_var *reserved_object, *last_reported = NULL; -- unsigned int index, i; -+ unsigned int i; - -- if (var->regs[regset].id < min_index) -+ if (var->regs[regset].index < min_index) - { - assert(regset == HLSL_REGSET_UAVS); - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "UAV index (%u) must be higher than the maximum render target index (%u).", -- var->regs[regset].id, min_index - 1); -+ var->regs[regset].index, min_index - 1); - continue; - } - - for (i = 0; i < count; ++i) - { -- index = var->regs[regset].id + i; -+ unsigned int space = var->regs[regset].space; -+ unsigned int index = var->regs[regset].index + i; - - /* get_allocated_object() may return "var" itself, but we - * actually want that, otherwise we'll end up reporting the - * same conflict between the same two variables twice. */ -- reserved_object = get_allocated_object(ctx, regset, index, true); -+ reserved_object = get_allocated_object(ctx, regset, space, index, true); - if (reserved_object && reserved_object != var && reserved_object != last_reported) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple variables bound to %c%u.", regset_name, index); -+ "Multiple variables bound to space %u, %c%u.", regset_name, space, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, -- "Variable '%s' is already bound to %c%u.", reserved_object->name, -- regset_name, index); -+ "Variable '%s' is already bound to space %u, %c%u.", -+ reserved_object->name, regset_name, space, index); - last_reported = reserved_object; - } - } -+ -+ if (hlsl_version_ge(ctx, 5, 1)) -+ var->regs[regset].id = id++; -+ else -+ var->regs[regset].id = var->regs[regset].index; -+ TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n", -+ var->name, var->regs[regset].space, regset_name, var->regs[regset].index, -+ regset_name, var->regs[regset].index + count, var->regs[regset].id); - } - else - { -@@ -5078,7 +5240,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - while (available < count) - { -- if (get_allocated_object(ctx, regset, index, false)) -+ if (get_allocated_object(ctx, regset, 0, index, false)) - available = 0; - else - ++available; -@@ -5086,10 +5248,15 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - } - index -= count; - -- var->regs[regset].id = index; -+ var->regs[regset].space = 0; -+ var->regs[regset].index = index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ var->regs[regset].id = id++; -+ else -+ var->regs[regset].id = var->regs[regset].index; - var->regs[regset].allocated = true; -- TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, -- index + count); -+ TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name, -+ regset_name, index, regset_name, index + count, var->regs[regset].id); - ++index; - } - } -@@ -5123,21 +5290,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - { - case HLSL_CLASS_VECTOR: - if (idx >= type->dimx) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Vector index is out of bounds. %u/%u", idx, type->dimx); - return false; -- } - *start += idx; - break; - - case HLSL_CLASS_MATRIX: - if (idx >= hlsl_type_major_size(type)) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); - return false; -- } - if (hlsl_type_is_row_major(type)) - *start += idx * type->dimx; - else -@@ -5146,11 +5305,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - - case HLSL_CLASS_ARRAY: - if (idx >= type->e.array.elements_count) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); - return false; -- } - *start += idx * hlsl_type_component_count(type->e.array.type); - break; - -@@ -5295,6 +5450,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - assert(deref->data_type); - assert(hlsl_is_numeric_type(deref->data_type)); - -+ ret.index += offset / 4; - ret.id += offset / 4; - - ret.writemask = 0xf & (0xf << (offset % 4)); -@@ -5446,6 +5602,330 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - } while (progress); - } - -+static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, -+ struct vsir_program *program, bool output, struct hlsl_ir_var *var) -+{ -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ enum vkd3d_shader_register_type type; -+ struct shader_signature *signature; -+ struct signature_element *element; -+ unsigned int register_index, mask; -+ -+ if ((!output && !var->last_read) || (output && !var->first_write)) -+ return; -+ -+ if (output) -+ signature = &program->output_signature; -+ else -+ signature = &program->input_signature; -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element = &signature->elements[signature->element_count++]; -+ -+ if (!hlsl_sm1_register_from_semantic(&program->shader_version, -+ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) -+ { -+ unsigned int usage, usage_idx; -+ bool ret; -+ -+ register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ -+ ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); -+ assert(ret); -+ /* With the exception of vertex POSITION output, none of these are -+ * system values. Pixel POSITION input is not equivalent to -+ * SV_Position; the closer equivalent is VPOS, which is not declared -+ * as a semantic. */ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ } -+ mask = (1 << var->data_type->dimx) - 1; -+ -+ memset(element, 0, sizeof(*element)); -+ if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) -+ { -+ --signature->element_count; -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element->semantic_index = var->semantic.index; -+ element->sysval_semantic = sysval; -+ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->register_index = register_index; -+ element->target_location = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = mask; -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) -+ element->interpolation_mode = VKD3DSIM_LINEAR; -+} -+ -+static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) -+{ -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_input_semantic) -+ sm1_generate_vsir_signature_entry(ctx, program, false, var); -+ if (var->is_output_semantic) -+ sm1_generate_vsir_signature_entry(ctx, program, true, var); -+ } -+} -+ -+/* OBJECTIVE: Translate all the information from ctx and entry_func to the -+ * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -+ * without relying on ctx and entry_func. */ -+static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+{ -+ struct vkd3d_shader_version version = {0}; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ if (!vsir_program_init(program, &version, 0)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ write_sm1_uniforms(ctx, &buffer); -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ ctab->code = buffer.data; -+ ctab->size = buffer.size; -+ -+ sm1_generate_vsir_signature(ctx, program); -+} -+ -+static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -+ struct hlsl_block **found_block) -+{ -+ struct hlsl_ir_node *node; -+ -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (node == stop_point) -+ return NULL; -+ -+ if (node->type == HLSL_IR_IF) -+ { -+ struct hlsl_ir_if *iff = hlsl_ir_if(node); -+ struct hlsl_ir_jump *jump = NULL; -+ -+ if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) -+ return jump; -+ if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) -+ return jump; -+ } -+ else if (node->type == HLSL_IR_JUMP) -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); -+ -+ if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) -+ { -+ *found_block = block; -+ return jump; -+ } -+ } -+ } -+ -+ return NULL; -+} -+ -+static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) -+{ -+ /* Always use the explicit limit if it has been passed. */ -+ if (loop->unroll_limit) -+ return loop->unroll_limit; -+ -+ /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ return 1024; -+ -+ /* SM4 limits implicit unrolling to 254 iterations. */ -+ if (hlsl_version_ge(ctx, 4, 0)) -+ return 254; -+ -+ /* SM<3 implicitly unrolls up to 1024 iterations. */ -+ return 1024; -+} -+ -+static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) -+{ -+ unsigned int max_iterations, i; -+ -+ max_iterations = loop_unrolling_get_max_iterations(ctx, loop); -+ -+ for (i = 0; i < max_iterations; ++i) -+ { -+ struct hlsl_block tmp_dst, *jump_block; -+ struct hlsl_ir_jump *jump = NULL; -+ -+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) -+ return false; -+ list_move_before(&loop->node.entry, &tmp_dst.instrs); -+ hlsl_block_cleanup(&tmp_dst); -+ -+ hlsl_run_const_passes(ctx, block); -+ -+ if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) -+ { -+ enum hlsl_ir_jump_type type = jump->type; -+ -+ if (jump_block != loop_parent) -+ { -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, -+ "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); -+ return false; -+ } -+ -+ list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); -+ hlsl_block_cleanup(&tmp_dst); -+ -+ if (type == HLSL_IR_JUMP_BREAK) -+ break; -+ } -+ } -+ -+ /* Native will not emit an error if max_iterations has been reached with an -+ * explicit limit. It also will not insert a loop if there are iterations left -+ * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ -+ if (!loop->unroll_limit && i == max_iterations) -+ { -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, -+ "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); -+ return false; -+ } -+ -+ list_remove(&loop->node.entry); -+ hlsl_free_instr(&loop->node); -+ -+ return true; -+} -+ -+/* -+ * loop_unrolling_find_unrollable_loop() is not the normal way to do things; -+ * normal passes simply iterate over the whole block and apply a transformation -+ * to every relevant instruction. However, loop unrolling can fail, and we want -+ * to leave the loop in its previous state in that case. That isn't a problem by -+ * itself, except that loop unrolling needs copy-prop in order to work properly, -+ * and copy-prop state at the time of the loop depends on the rest of the program -+ * up to that point. This means we need to clone the whole program, and at that -+ * point we have to search it again anyway to find the clone of the loop we were -+ * going to unroll. -+ * -+ * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop -+ * up until the loop instruction, clone just that loop, then use copyprop again -+ * with the saved state after unrolling. However, copyprop currently isn't built -+ * for that yet [notably, it still relies on indices]. Note also this still doesn't -+ * really let us use transform_ir() anyway [since we don't have a good way to say -+ * "copyprop from the beginning of the program up to the instruction we're -+ * currently processing" from the callback]; we'd have to use a dedicated -+ * recursive function instead. */ -+static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_block **containing_block) -+{ -+ struct hlsl_ir_node *instr; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (instr->type) -+ { -+ case HLSL_IR_LOOP: -+ { -+ struct hlsl_ir_loop *nested_loop; -+ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -+ -+ if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) -+ return nested_loop; -+ -+ if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ { -+ *containing_block = block; -+ return loop; -+ } -+ -+ break; -+ } -+ case HLSL_IR_IF: -+ { -+ struct hlsl_ir_loop *loop; -+ struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) -+ return loop; -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) -+ return loop; -+ -+ break; -+ } -+ case HLSL_IR_SWITCH: -+ { -+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -+ struct hlsl_ir_switch_case *c; -+ struct hlsl_ir_loop *loop; -+ -+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -+ { -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) -+ return loop; -+ } -+ -+ break; -+ } -+ default: -+ break; -+ } -+ } -+ -+ return NULL; -+} -+ -+static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) -+{ -+ while (true) -+ { -+ struct hlsl_block clone, *containing_block; -+ struct hlsl_ir_loop *loop, *cloned_loop; -+ -+ if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) -+ return; -+ -+ if (!hlsl_clone_block(ctx, &clone, block)) -+ return; -+ -+ cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); -+ assert(cloned_loop); -+ -+ if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) -+ { -+ hlsl_block_cleanup(&clone); -+ loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; -+ continue; -+ } -+ -+ hlsl_block_cleanup(block); -+ hlsl_block_init(block); -+ hlsl_block_add_block(block, &clone); -+ } -+} -+ - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -5532,6 +6012,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } - -+ transform_unroll_loops(ctx, body); - hlsl_run_const_passes(ctx, body); - - remove_unreachable_code(ctx, body); -@@ -5541,7 +6022,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - -- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, validate_dereferences, body, NULL); - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - if (profile->major_version >= 4) - hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -@@ -5628,7 +6109,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - switch (target_type) - { - case VKD3D_SHADER_TARGET_D3D_BYTECODE: -- return hlsl_sm1_write(ctx, entry_func, out); -+ { -+ uint32_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vkd3d_shader_code ctab = {0}; -+ struct vsir_program program; -+ int result; -+ -+ sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); -+ if (ctx->result) -+ { -+ vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&ctab); -+ return ctx->result; -+ } -+ -+ result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); -+ vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&ctab); -+ return result; -+ } - - case VKD3D_SHADER_TARGET_DXBC_TPF: - return hlsl_sm4_write(ctx, entry_func, out); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b3b745fc1b2..e5432cb35ce 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -46,9 +46,9 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade - - static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) -- || handler_idx == VKD3DSIH_HS_DECLS; -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) -+ || opcode == VKD3DSIH_HS_DECLS; - } - - static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) -@@ -60,9 +60,9 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i - - static bool vsir_instruction_init_with_params(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) -+ enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) - { -- vsir_instruction_init(ins, location, handler_idx); -+ vsir_instruction_init(ins, location, opcode); - ins->dst_count = dst_count; - ins->src_count = src_count; - -@@ -287,7 +287,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - mul_ins = &instructions->elements[pos]; - add_ins = &instructions->elements[pos + 1]; - -- mul_ins->handler_idx = VKD3DSIH_MUL; -+ mul_ins->opcode = VKD3DSIH_MUL; - mul_ins->src_count = 2; - - if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) -@@ -322,7 +322,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - { - struct vkd3d_shader_instruction *ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_IFC: - if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) -@@ -492,26 +492,26 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal - struct shader_phase_location *loc; - bool b; - -- if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -+ if (ins->opcode == VKD3DSIH_HS_FORK_PHASE || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) - { - b = flattener_is_in_fork_or_join_phase(normaliser); - /* Reset the phase info. */ - normaliser->phase_body_idx = ~0u; -- normaliser->phase = ins->handler_idx; -+ normaliser->phase = ins->opcode; - normaliser->instance_count = 1; - /* Leave the first occurrence and delete the rest. */ - if (b) - vkd3d_shader_instruction_make_nop(ins); - return; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT -- || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) -+ else if (ins->opcode == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT -+ || ins->opcode == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) - { - normaliser->instance_count = ins->declaration.count + !ins->declaration.count; - vkd3d_shader_instruction_make_nop(ins); - return; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( -+ else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( - &ins->declaration.dst.reg)) - { - vkd3d_shader_instruction_make_nop(ins); -@@ -524,7 +524,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal - if (normaliser->phase_body_idx == ~0u) - normaliser->phase_body_idx = index; - -- if (ins->handler_idx == VKD3DSIH_RET) -+ if (ins->opcode == VKD3DSIH_RET) - { - normaliser->last_ret_location = ins->location; - vkd3d_shader_instruction_make_nop(ins); -@@ -679,11 +679,11 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 - } - - void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx) -+ enum vkd3d_shader_opcode opcode) - { - memset(ins, 0, sizeof(*ins)); - ins->location = *location; -- ins->handler_idx = handler_idx; -+ ins->opcode = opcode; - } - - static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, -@@ -865,12 +865,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - { - ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser.phase = ins->handler_idx; -+ normaliser.phase = ins->opcode; - break; - default: - if (vsir_instruction_is_dcl(ins)) -@@ -888,7 +888,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - { - ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - input_control_point_count = ins->declaration.count; -@@ -1526,7 +1526,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - struct vkd3d_shader_register *reg; - unsigned int i; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT: - if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) -@@ -1560,7 +1560,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser->phase = ins->handler_idx; -+ normaliser->phase = ins->opcode; - memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); - memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); - memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); -@@ -1594,7 +1594,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - { - ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - normaliser.output_control_point_count = ins->declaration.count; -@@ -1608,7 +1608,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - /* fall through */ - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser.phase = ins->handler_idx; -+ normaliser.phase = ins->opcode; - break; - default: - break; -@@ -1740,7 +1740,7 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) -+ if (ins->opcode == VKD3DSIH_DEF || ins->opcode == VKD3DSIH_DEFI || ins->opcode == VKD3DSIH_DEFB) - { - struct flat_constant_def *def; - -@@ -1779,7 +1779,7 @@ static void remove_dead_code(struct vsir_program *program) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_IF: - case VKD3DSIH_LOOP: -@@ -1799,7 +1799,7 @@ static void remove_dead_code(struct vsir_program *program) - { - if (depth > 0) - { -- if (ins->handler_idx != VKD3DSIH_ELSE) -+ if (ins->opcode != VKD3DSIH_ELSE) - --depth; - vkd3d_shader_instruction_make_nop(ins); - } -@@ -1870,14 +1870,14 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - struct vkd3d_shader_src_param *srcs; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_TEX: - if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 3); - -- ins->handler_idx = VKD3DSIH_SAMPLE; -+ ins->opcode = VKD3DSIH_SAMPLE; - - srcs[0] = ins->src[0]; - -@@ -1899,13 +1899,42 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - ins->src_count = 3; - break; - -+ case VKD3DSIH_TEXLDD: -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ memset(srcs, 0, sizeof(*srcs) * 5); -+ -+ ins->opcode = VKD3DSIH_SAMPLE_GRAD; -+ -+ srcs[0] = ins->src[0]; -+ -+ srcs[1].reg.type = VKD3DSPR_RESOURCE; -+ srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; -+ srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; -+ srcs[1].reg.idx_count = 2; -+ srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; -+ srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ srcs[2].reg.type = VKD3DSPR_SAMPLER; -+ srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; -+ srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; -+ srcs[2].reg.idx_count = 2; -+ srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; -+ -+ srcs[3] = ins->src[2]; -+ srcs[4] = ins->src[3]; -+ -+ ins->src = srcs; -+ ins->src_count = 5; -+ break; -+ - case VKD3DSIH_TEXBEM: - case VKD3DSIH_TEXBEML: - case VKD3DSIH_TEXCOORD: - case VKD3DSIH_TEXDEPTH: - case VKD3DSIH_TEXDP3: - case VKD3DSIH_TEXDP3TEX: -- case VKD3DSIH_TEXLDD: - case VKD3DSIH_TEXLDL: - case VKD3DSIH_TEXM3x2PAD: - case VKD3DSIH_TEXM3x2TEX: -@@ -1919,7 +1948,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - case VKD3DSIH_TEXREG2RGB: - vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " -- "Combined sampler instruction %#x.", ins->handler_idx); -+ "Combined sampler instruction %#x.", ins->opcode); - return VKD3D_ERROR_NOT_IMPLEMENTED; - - default: -@@ -2030,7 +2059,7 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, - { - struct vkd3d_shader_instruction *dst_ins; - -- if (instruction->handler_idx == VKD3DSIH_NOP) -+ if (instruction->opcode == VKD3DSIH_NOP) - return true; - - if (!(dst_ins = cf_flattener_require_space(flattener, 1))) -@@ -2245,9 +2274,9 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - * phase instruction, and in all other shader types begins with the first label instruction. - * Declaring an indexable temp with function scope is not considered a declaration, - * because it needs to live inside a function. */ -- if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) -+ if (!after_declarations_section && instruction->opcode != VKD3DSIH_NOP) - { -- bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP -+ bool is_function_indexable = instruction->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP - && instruction->declaration.indexable_temp.has_function_scope; - - if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) -@@ -2260,14 +2289,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - cf_info = flattener->control_flow_depth - ? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - if (!cf_flattener_copy_instruction(flattener, instruction)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) -+ if (instruction->opcode != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) - after_declarations_section = false; - break; - -@@ -2662,7 +2691,7 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - unsigned int case_count, j, default_label; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); -@@ -2858,7 +2887,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - - /* Only phi src/dst SSA values need be converted here. Structurisation may - * introduce new cases of undominated SSA use, which will be handled later. */ -- if (ins->handler_idx != VKD3DSIH_PHI) -+ if (ins->opcode != VKD3DSIH_PHI) - continue; - ++phi_count; - -@@ -2907,7 +2936,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); -@@ -3336,7 +3365,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) - if (block->label == 0) - continue; - -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_RET: - shape = "trapezium"; -@@ -3478,7 +3507,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; - bool finish = false; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: -@@ -3533,7 +3562,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - if (block->label == 0) - continue; - -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_RET: - break; -@@ -4192,7 +4221,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - structure->u.block = block; - - /* Generate between zero and two jump instructions. */ -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_BRANCH: - { -@@ -5049,7 +5078,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -@@ -5064,7 +5093,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -- TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); -+ TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); - target.instructions[target.ins_count++] = *ins; - ++i; - if ((ret = vsir_program_structurize_function(program, message_context, -@@ -5222,7 +5251,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -@@ -5237,7 +5266,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -- TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); -+ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); - ++i; - if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( - program, message_context, &i)) < 0) -@@ -5641,7 +5670,7 @@ static void vsir_validate_dst_count(struct validation_context *ctx, - if (instruction->dst_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, - "Invalid destination count %u for an instruction of type %#x, expected %u.", -- instruction->dst_count, instruction->handler_idx, count); -+ instruction->dst_count, instruction->opcode, count); - } - - static void vsir_validate_src_count(struct validation_context *ctx, -@@ -5650,7 +5679,7 @@ static void vsir_validate_src_count(struct validation_context *ctx, - if (instruction->src_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - } - - static bool vsir_validate_src_min_count(struct validation_context *ctx, -@@ -5660,7 +5689,7 @@ static bool vsir_validate_src_min_count(struct validation_context *ctx, - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at least %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - return false; - } - -@@ -5674,7 +5703,7 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at most %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - return false; - } - -@@ -5701,7 +5730,7 @@ static void vsir_validate_cf_type(struct validation_context *ctx, - assert(expected_type != CF_TYPE_UNKNOWN); - if (ctx->cf_type != expected_type) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", -- instruction->handler_idx, name_from_cf_type(ctx->cf_type)); -+ instruction->opcode, name_from_cf_type(ctx->cf_type)); - } - - static void vsir_validate_instruction(struct validation_context *ctx) -@@ -5718,13 +5747,13 @@ static void vsir_validate_instruction(struct validation_context *ctx) - for (i = 0; i < instruction->src_count; ++i) - vsir_validate_src_param(ctx, &instruction->src[i]); - -- if (instruction->handler_idx >= VKD3DSIH_INVALID) -+ if (instruction->opcode >= VKD3DSIH_INVALID) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", -- instruction->handler_idx); -+ instruction->opcode); - } - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_HS_CONTROL_POINT_PHASE: -@@ -5733,12 +5762,14 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (version->type != VKD3D_SHADER_TYPE_HULL) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -+ "Phase instruction %#x is only valid in a hull shader.", -+ instruction->opcode); - if (ctx->depth != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.", -- instruction->handler_idx); -- ctx->phase = instruction->handler_idx; -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Phase instruction %#x must appear to top level.", -+ instruction->opcode); -+ ctx->phase = instruction->opcode; - ctx->dcl_temps_found = false; - return; - -@@ -5812,7 +5843,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - && ctx->phase == VKD3DSIH_INVALID) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Instruction %#x appear before any phase instruction in a hull shader.", -- instruction->handler_idx); -+ instruction->opcode); - - /* We support two different control flow types in shaders: - * block-based, like DXIL and SPIR-V, and structured, like D3DBC -@@ -5824,7 +5855,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - * block, but need for that hasn't arisen yet, so we don't. */ - if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) - { -- if (instruction->handler_idx == VKD3DSIH_LABEL) -+ if (instruction->opcode == VKD3DSIH_LABEL) - ctx->cf_type = CF_TYPE_BLOCKS; - else - ctx->cf_type = CF_TYPE_STRUCTURED; -@@ -5832,7 +5863,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - - if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) - { -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_LABEL: - if (ctx->inside_block) -@@ -5844,20 +5875,22 @@ static void vsir_validate_instruction(struct validation_context *ctx) - case VKD3DSIH_BRANCH: - case VKD3DSIH_SWITCH_MONOLITHIC: - if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid instruction %#x outside any block.", -+ instruction->opcode); - ctx->inside_block = false; - break; - - default: - if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid instruction %#x outside any block.", -+ instruction->opcode); - break; - } - } - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_TEMPS: - vsir_validate_dst_count(ctx, instruction, 0); -@@ -5877,7 +5910,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_IFC: -@@ -5896,7 +5929,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); - else -- ctx->blocks[ctx->depth - 1] = instruction->handler_idx; -+ ctx->blocks[ctx->depth - 1] = instruction->opcode; - break; - - case VKD3DSIH_ENDIF: -@@ -5915,7 +5948,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDLOOP: -@@ -5934,7 +5967,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDREP: -@@ -5953,7 +5986,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDSWITCH: -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index be50d3b9020..a3cdbe559a7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -20,6 +20,7 @@ - - %{ - -+#include "preproc.h" - #include "preproc.tab.h" - - #undef ERROR /* defined in wingdi.h */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 984a4f894f6..524fb8e9b1f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -1752,6 +1752,22 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu - return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); - } - -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_swap(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t op_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadSwap, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, op_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t index_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadBroadcast, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, index_id); -+} -+ - static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t val_id) - { -@@ -6831,7 +6847,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - uint32_t function_id, void_id, function_type_id; - struct vkd3d_shader_phase *phase; - -- assert(compiler->phase != instruction->handler_idx); -+ assert(compiler->phase != instruction->opcode); - - if (!is_in_default_phase(compiler)) - spirv_compiler_leave_shader_phase(compiler); -@@ -6843,16 +6859,16 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_function(builder, void_id, function_id, - SpvFunctionControlMaskNone, function_type_id); - -- compiler->phase = instruction->handler_idx; -+ compiler->phase = instruction->opcode; - spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); - -- phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ phase = (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - ? &compiler->control_point_phase : &compiler->patch_constant_phase; - phase->function_id = function_id; - /* The insertion location must be set after the label is emitted. */ - phase->function_location = 0; - -- if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - compiler->emit_default_control_point_phase = instruction->flags; - } - -@@ -7016,7 +7032,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp spirv_op; - } - alu_ops[] = -@@ -7056,7 +7072,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - - for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) - { -- if (alu_ops[i].handler_idx == instruction->handler_idx) -+ if (alu_ops[i].opcode == instruction->opcode) - return alu_ops[i].spirv_op; - } - -@@ -7065,7 +7081,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - - static SpvOp spirv_compiler_map_logical_instruction(const struct vkd3d_shader_instruction *instruction) - { -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_AND: - return SpvOpLogicalAnd; -@@ -7090,20 +7106,20 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) - { -- val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); -+ val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); - } - else if (dst->reg.data_type == VKD3D_DATA_DOUBLE) - { - /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ -- val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); -+ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); - } - else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) - { -- val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); -+ val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); - } - else if (dst->reg.data_type == VKD3D_DATA_UINT64) - { -- val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); -+ val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); - } - else - { -@@ -7126,7 +7142,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - SpvOp op = SpvOpMax; - unsigned int i; - -- if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) -+ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->opcode == VKD3DSIH_COUNTBITS) - { - /* At least some drivers support this anyway, but if validation is enabled it will fail. */ - FIXME("Unsupported 64-bit source for bit count.\n"); -@@ -7142,8 +7158,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - /* VSIR supports logic ops AND/OR/XOR on bool values. */ - op = spirv_compiler_map_logical_instruction(instruction); - } -- else if (instruction->handler_idx == VKD3DSIH_ITOF || instruction->handler_idx == VKD3DSIH_UTOF -- || instruction->handler_idx == VKD3DSIH_ITOI || instruction->handler_idx == VKD3DSIH_UTOU) -+ else if (instruction->opcode == VKD3DSIH_ITOF || instruction->opcode == VKD3DSIH_UTOF -+ || instruction->opcode == VKD3DSIH_ITOI || instruction->opcode == VKD3DSIH_UTOU) - { - /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, - * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ -@@ -7158,9 +7174,9 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - - if (op == SpvOpMax) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, -- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); -+ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); - return VKD3D_ERROR_INVALID_SHADER; - } - -@@ -7179,8 +7195,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - * Microsoft fxc will compile immediate constants larger than 5 bits. - * Fixing up the constants would be more elegant, but the simplest way is - * to let this handle constants too. */ -- if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->handler_idx == VKD3DSIH_ISHL -- || instruction->handler_idx == VKD3DSIH_ISHR || instruction->handler_idx == VKD3DSIH_USHR)) -+ if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->opcode == VKD3DSIH_ISHL -+ || instruction->opcode == VKD3DSIH_ISHR || instruction->opcode == VKD3DSIH_USHR)) - { - uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, - VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); -@@ -7218,7 +7234,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - enum GLSLstd450 glsl_inst; - } - glsl_insts[] = -@@ -7258,7 +7274,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - - for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) - { -- if (glsl_insts[i].handler_idx == instruction->handler_idx) -+ if (glsl_insts[i].opcode == instruction->opcode) - return glsl_insts[i].glsl_inst; - } - -@@ -7276,20 +7292,20 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - unsigned int i, component_count; - enum GLSLstd450 glsl_inst; - -- if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) -+ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->opcode == VKD3DSIH_FIRSTBIT_HI -+ || instruction->opcode == VKD3DSIH_FIRSTBIT_LO || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI)) - { - /* At least some drivers support this anyway, but if validation is enabled it will fail. */ -- FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); -+ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -- "64-bit source for handler %#x is not supported.", instruction->handler_idx); -+ "64-bit source for handler %#x is not supported.", instruction->opcode); - return; - } - - glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); - if (glsl_inst == GLSLstd450Bad) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -7306,8 +7322,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, - instr_set_id, glsl_inst, src_id, instruction->src_count); - -- if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) -+ if (instruction->opcode == VKD3DSIH_FIRSTBIT_HI -+ || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI) - { - /* In D3D bits are numbered from the most significant bit. */ - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -7415,7 +7431,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, - - if (src[0].reg.data_type != VKD3D_DATA_BOOL) - { -- if (instruction->handler_idx == VKD3DSIH_CMP) -+ if (instruction->opcode == VKD3DSIH_CMP) - condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, - spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); -@@ -7469,9 +7485,9 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, - component_count = vsir_write_mask_component_count(dst->write_mask); - component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - -- if (instruction->handler_idx == VKD3DSIH_DP4) -+ if (instruction->opcode == VKD3DSIH_DP4) - write_mask = VKD3DSP_WRITEMASK_ALL; -- else if (instruction->handler_idx == VKD3DSIH_DP3) -+ else if (instruction->opcode == VKD3DSIH_DP3) - write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; - else - write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; -@@ -7606,8 +7622,8 @@ static void spirv_compiler_emit_int_div(struct spirv_compiler *compiler, - unsigned int component_count = 0; - SpvOp div_op, mod_op; - -- div_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; -- mod_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; -+ div_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; -+ mod_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; - - if (dst[0].reg.type != VKD3DSPR_NULL) - { -@@ -7778,13 +7794,13 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp - mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); - size_id = spirv_compiler_get_constant_uint(compiler, size); - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; - case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; - case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -7895,7 +7911,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - unsigned int component_count; - SpvOp op; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DEQO: - case VKD3DSIH_EQO: op = SpvOpFOrdEqual; break; -@@ -7916,7 +7932,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; - case VKD3DSIH_ULT: op = SpvOpULessThan; break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -7949,7 +7965,7 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c - src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); - src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); - val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); -- if (instruction->handler_idx == VKD3DSIH_ORD) -+ if (instruction->opcode == VKD3DSIH_ORD) - val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } -@@ -7964,7 +7980,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil - unsigned int component_count; - SpvOp op; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; - case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; -@@ -8262,7 +8278,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - - static const struct instruction_info - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp op; - bool needs_derivative_control; - } -@@ -8279,7 +8295,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - info = NULL; - for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) - { -- if (deriv_instructions[i].handler_idx == instruction->handler_idx) -+ if (deriv_instructions[i].opcode == instruction->opcode) - { - info = &deriv_instructions[i]; - break; -@@ -8287,7 +8303,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - } - if (!info) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -8497,7 +8513,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, - uint32_t coordinate_mask; - bool multisample; - -- multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; -+ multisample = instruction->opcode == VKD3DSIH_LD2DMS; - - spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); - -@@ -8576,7 +8592,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - spirv_compiler_prepare_image(compiler, &image, - &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_SAMPLE: - op = SpvOpImageSampleImplicitLod; -@@ -8603,7 +8619,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - &src[3], VKD3DSP_WRITEMASK_0); - break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -8637,7 +8653,7 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, - uint32_t image_operands[2]; - SpvOp op; - -- if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) -+ if (instruction->opcode == VKD3DSIH_SAMPLE_C_LZ) - { - op = SpvOpImageSampleDrefExplicitLod; - operands_mask |= SpvImageOperandsLodMask; -@@ -8687,12 +8703,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, - uint32_t coordinate_mask; - bool extended_offset; - -- if (instruction->handler_idx == VKD3DSIH_GATHER4_C -- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) -+ if (instruction->opcode == VKD3DSIH_GATHER4_C -+ || instruction->opcode == VKD3DSIH_GATHER4_PO_C) - image_flags |= VKD3D_IMAGE_FLAG_DEPTH; - -- extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO -- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; -+ extended_offset = instruction->opcode == VKD3DSIH_GATHER4_PO -+ || instruction->opcode == VKD3DSIH_GATHER4_PO_C; - - addr = &src[0]; - offset = extended_offset ? &src[1] : NULL; -@@ -8963,7 +8979,6 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * - { - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); -- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -@@ -9007,7 +9022,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); -- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -@@ -9145,7 +9159,7 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c - uint32_t operands[3]; - SpvOp op; - -- op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC -+ op = instruction->opcode == VKD3DSIH_IMM_ATOMIC_ALLOC - ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; - - resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); -@@ -9211,7 +9225,7 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp spirv_op; - } - atomic_ops[] = -@@ -9240,16 +9254,16 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins - - for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) - { -- if (atomic_ops[i].handler_idx == instruction->handler_idx) -+ if (atomic_ops[i].opcode == instruction->opcode) - return atomic_ops[i].spirv_op; - } - - return SpvOpMax; - } - --static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) -+static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode opcode) - { -- return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; -+ return VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR; - } - - static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compiler, -@@ -9274,12 +9288,12 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - bool raw; - SpvOp op; - -- resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; -+ resource = is_imm_atomic_instruction(instruction->opcode) ? &dst[1] : &dst[0]; - - op = spirv_compiler_map_atomic_instruction(instruction); - if (op == SpvOpMax) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -9360,7 +9374,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - { - WARN("Ignoring 'volatile' attribute.\n"); - spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, -- "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); -+ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->opcode); - } - - memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) -@@ -9379,7 +9393,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, - op, type_id, operands, i); - -- if (is_imm_atomic_instruction(instruction->handler_idx)) -+ if (is_imm_atomic_instruction(instruction->opcode)) - spirv_compiler_emit_store_dst(compiler, dst, result_id); - } - -@@ -9684,13 +9698,13 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, - - src_ids[src_count++] = register_info.id; - -- if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) -+ if (instruction->opcode == VKD3DSIH_EVAL_CENTROID) - { - op = GLSLstd450InterpolateAtCentroid; - } - else - { -- assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); -+ assert(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); - op = GLSLstd450InterpolateAtSample; - src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); - } -@@ -9772,7 +9786,7 @@ static void spirv_compiler_emit_emit_stream(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int stream_idx; - -- if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) -+ if (instruction->opcode == VKD3DSIH_EMIT_STREAM) - stream_idx = instruction->src[0].reg.idx[0].offset; - else - stream_idx = 0; -@@ -9793,7 +9807,7 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int stream_idx; - -- if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) -+ if (instruction->opcode == VKD3DSIH_CUT_STREAM) - stream_idx = instruction->src[0].reg.idx[0].offset; - else - stream_idx = 0; -@@ -9807,9 +9821,68 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_end_primitive(builder); - } - --static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) -+static uint32_t map_quad_read_across_direction(enum vkd3d_shader_opcode opcode) - { -- switch (handler_idx) -+ switch (opcode) -+ { -+ case VKD3DSIH_QUAD_READ_ACROSS_X: -+ return 0; -+ case VKD3DSIH_QUAD_READ_ACROSS_Y: -+ return 1; -+ case VKD3DSIH_QUAD_READ_ACROSS_D: -+ return 2; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, direction_type_id, direction_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VKD3D_DATA_UINT, 1); -+ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ direction_id = map_quad_read_across_direction(instruction->opcode); -+ direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_swap(builder, type_id, val_id, direction_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id, lane_id; -+ -+ if (!register_is_constant_or_undef(&src[1].reg)) -+ { -+ FIXME("Unsupported non-constant quad read lane index.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "Non-constant quad read lane indices are not supported."); -+ return; -+ } -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode opcode) -+{ -+ switch (opcode) - { - case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: - return SpvOpGroupNonUniformAllEqual; -@@ -9833,7 +9906,7 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, - - vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); - -- op = map_wave_bool_op(instruction->handler_idx); -+ op = map_wave_bool_op(instruction->opcode); - type_id = vkd3d_spirv_get_op_type_bool(builder); - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, -@@ -9865,9 +9938,9 @@ static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compil - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - --static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) -+static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode opcode, bool is_float) - { -- switch (handler_idx) -+ switch (opcode) - { - case VKD3DSIH_WAVE_ACTIVE_BIT_AND: - return SpvOpGroupNonUniformBitwiseAnd; -@@ -9905,7 +9978,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, - uint32_t type_id, val_id; - SpvOp op; - -- op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); -+ op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type)); - - type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); -@@ -9928,7 +10001,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, - SpvGroupOperation group_op; - uint32_t type_id, val_id; - -- group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan -+ group_op = (instruction->opcode == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan - : SpvGroupOperationReduce; - - val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); -@@ -10014,7 +10087,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - - compiler->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_GLOBAL_FLAGS: - spirv_compiler_emit_dcl_global_flags(compiler, instruction); -@@ -10337,6 +10410,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CUT_STREAM: - spirv_compiler_emit_cut_stream(compiler, instruction); - break; -+ case VKD3DSIH_QUAD_READ_ACROSS_D: -+ case VKD3DSIH_QUAD_READ_ACROSS_X: -+ case VKD3DSIH_QUAD_READ_ACROSS_Y: -+ spirv_compiler_emit_quad_read_across(compiler, instruction); -+ break; -+ case VKD3DSIH_QUAD_READ_LANE_AT: -+ spirv_compiler_emit_quad_read_lane_at(compiler, instruction); -+ break; - case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: - case VKD3DSIH_WAVE_ALL_TRUE: - case VKD3DSIH_WAVE_ANY_TRUE: -@@ -10390,9 +10471,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - /* nothing to do */ - break; - default: -- FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); -+ FIXME("Unhandled instruction %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, -- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); -+ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); - break; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index b562e815a81..a7c37215e5e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -780,7 +780,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) - { - FIXME("Ignoring shader data type %#x.\n", type); -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - return; - } - -@@ -789,7 +789,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - if (icb_size % 4) - { - FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - -@@ -797,7 +797,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); - vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - icb->register_idx = 0; -@@ -2395,16 +2395,16 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(opcode_info = get_info_from_sm4_opcode(&sm4->lookup, opcode))) - { - FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - *ptr += len; - return; - } - - vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); -- if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE -- || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -- sm4->phase = ins->handler_idx; -- sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+ if (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->opcode == VKD3DSIH_HS_FORK_PHASE -+ || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) -+ sm4->phase = ins->opcode; -+ sm4->has_control_point_phase |= ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE; - ins->flags = 0; - ins->coissue = false; - ins->raw = false; -@@ -2417,7 +2417,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - { - ERR("Failed to allocate src parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; -@@ -2459,7 +2459,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - { - ERR("Failed to allocate dst parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - for (i = 0; i < ins->dst_count; ++i) -@@ -2467,7 +2467,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), - &dst_params[i]))) - { -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - dst_params[i].modifiers |= instruction_dst_modifier; -@@ -2478,7 +2478,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), - &src_params[i]))) - { -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - } -@@ -2488,7 +2488,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - - fail: - *ptr = sm4->end; -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - -@@ -2693,7 +2693,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(&sm4, ins); - -- if (ins->handler_idx == VKD3DSIH_INVALID) -+ if (ins->opcode == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - vsir_program_cleanup(program); -@@ -2762,6 +2762,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, -+ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) -@@ -2817,6 +2818,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -+ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, -@@ -2984,11 +2986,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - return D3D_SVC_VECTOR; - - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -2997,6 +3001,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: - break; - } - vkd3d_unreachable(); -@@ -3178,13 +3183,14 @@ struct extern_resource - /* var is only not NULL if this resource is a whole variable, so it may be responsible for more - * than one component. */ - const struct hlsl_ir_var *var; -+ const struct hlsl_buffer *buffer; - - char *name; - struct hlsl_type *data_type; - bool is_user_packed; - - enum hlsl_regset regset; -- unsigned int id, bind_count; -+ unsigned int id, space, index, bind_count; - }; - - static int sm4_compare_extern_resources(const void *a, const void *b) -@@ -3196,7 +3202,10 @@ static int sm4_compare_extern_resources(const void *a, const void *b) - if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) - return r; - -- return vkd3d_u32_compare(aa->id, bb->id); -+ if ((r = vkd3d_u32_compare(aa->space, bb->space))) -+ return r; -+ -+ return vkd3d_u32_compare(aa->index, bb->index); - } - - static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -@@ -3220,6 +3229,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; - struct extern_resource *extern_resources = NULL; - const struct hlsl_ir_var *var; -+ struct hlsl_buffer *buffer; - enum hlsl_regset regset; - size_t capacity = 0; - char *name; -@@ -3272,13 +3282,16 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - hlsl_release_string_buffer(ctx, name_buffer); - - extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; - extern_resources[*count].data_type = component_type; - extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; - - extern_resources[*count].regset = regset; -- extern_resources[*count].id = var->regs[regset].id + regset_offset; -+ extern_resources[*count].id = var->regs[regset].id; -+ extern_resources[*count].space = var->regs[regset].space; -+ extern_resources[*count].index = var->regs[regset].index + regset_offset; - extern_resources[*count].bind_count = 1; - - ++*count; -@@ -3313,13 +3326,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - } - - extern_resources[*count].var = var; -+ extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; - extern_resources[*count].data_type = var->data_type; -- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -+ /* For some reason 5.1 resources aren't marked as -+ * user-packed, but cbuffers still are. */ -+ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) -+ && !!var->reg_reservation.reg_type; - - extern_resources[*count].regset = r; - extern_resources[*count].id = var->regs[r].id; -+ extern_resources[*count].space = var->regs[r].space; -+ extern_resources[*count].index = var->regs[r].index; - extern_resources[*count].bind_count = var->bind_count[r]; - - ++*count; -@@ -3327,14 +3346,51 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - } - } - -+ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (!buffer->reg.allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, buffer->name))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = buffer; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].data_type = NULL; -+ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; -+ -+ extern_resources[*count].regset = HLSL_REGSET_NUMERIC; -+ extern_resources[*count].id = buffer->reg.id; -+ extern_resources[*count].space = buffer->reg.space; -+ extern_resources[*count].index = buffer->reg.index; -+ extern_resources[*count].bind_count = 1; -+ -+ ++*count; -+ } -+ - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - return extern_resources; - } - - static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - { -- unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; -+ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -+ unsigned int cbuffer_count = 0, extern_resources_count, i, j; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; - struct vkd3d_bytecode_buffer buffer = {0}; -@@ -3354,19 +3410,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - -- resource_count += extern_resources_count; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- { - ++cbuffer_count; -- ++resource_count; -- } - } - - put_u32(&buffer, cbuffer_count); - cbuffer_position = put_u32(&buffer, 0); -- put_u32(&buffer, resource_count); -+ put_u32(&buffer, extern_resources_count); - resource_position = put_u32(&buffer, 0); - put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), - target_types[profile->type])); -@@ -3378,7 +3430,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -- put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ -+ put_u32(&buffer, binding_desc_size); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -@@ -3395,21 +3447,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); -- - if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, sm4_resource_type(resource->data_type)); -- if (resource->regset == HLSL_REGSET_SAMPLERS) -- { -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- } -+ if (resource->buffer) -+ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - else -+ put_u32(&buffer, sm4_resource_type(resource->data_type)); -+ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) - { - unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; - -@@ -3418,32 +3464,21 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } -- put_u32(&buffer, resource->id); -+ else -+ { -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ } -+ put_u32(&buffer, resource->index); - put_u32(&buffer, resource->bind_count); - put_u32(&buffer, flags); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- uint32_t flags = 0; -- -- if (!cbuffer->reg.allocated) -- continue; - - if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); -- -- if (cbuffer->reservation.reg_type) -- flags |= D3D_SIF_USERPACKED; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); -- put_u32(&buffer, 0); /* return type */ -- put_u32(&buffer, 0); /* dimension */ -- put_u32(&buffer, 0); /* multisample count */ -- put_u32(&buffer, cbuffer->reg.id); /* bind point */ -- put_u32(&buffer, 1); /* bind count */ -- put_u32(&buffer, flags); /* flags */ -+ { -+ put_u32(&buffer, resource->space); -+ put_u32(&buffer, resource->id); -+ } - } - - for (i = 0; i < extern_resources_count; ++i) -@@ -3451,16 +3486,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - - string_offset = put_string(&buffer, resource->name); -- set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (!cbuffer->reg.allocated) -- continue; -- -- string_offset = put_string(&buffer, cbuffer->name); -- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); -+ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); - } - - /* Buffers. */ -@@ -3522,7 +3548,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); - put_u32(&buffer, flags); - put_u32(&buffer, 0); /* type */ -- put_u32(&buffer, 0); /* FIXME: default value */ -+ put_u32(&buffer, 0); /* default value */ - - if (profile->major_version >= 5) - { -@@ -3546,6 +3572,34 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); - set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); -+ -+ if (var->default_values) -+ { -+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int comp_count = hlsl_type_component_count(var->data_type); -+ unsigned int default_value_offset; -+ unsigned int k; -+ -+ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); -+ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); -+ -+ for (k = 0; k < comp_count; ++k) -+ { -+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ unsigned int comp_offset; -+ enum hlsl_regset regset; -+ -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ if (regset == HLSL_REGSET_NUMERIC) -+ { -+ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -+ hlsl_fixme(ctx, &var->loc, "Write double default values."); -+ -+ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), -+ var->default_values[k].value.u); -+ } -+ } -+ } - ++j; - } - } -@@ -3720,30 +3774,57 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } - assert(regset == HLSL_REGSET_TEXTURES); -- reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } - assert(regset == HLSL_REGSET_UAVS); -- reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; -- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } - assert(regset == HLSL_REGSET_SAMPLERS); -- reg->idx_count = 1; - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else -@@ -3753,9 +3834,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - assert(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->buffer->reg.id; -- reg->idx[1].offset = offset / 4; -- reg->idx_count = 2; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -+ reg->idx[2].offset = offset / 4; -+ reg->idx_count = 3; -+ } -+ else -+ { -+ reg->idx[0].offset = var->buffer->reg.index; -+ reg->idx[1].offset = offset / 4; -+ reg->idx_count = 2; -+ } - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } -@@ -4139,18 +4230,36 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - - static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) - { -- const struct sm4_instruction instr = -+ size_t size = (cbuffer->used_size + 3) / 4; -+ -+ struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, -- .srcs[0].reg.idx[0].offset = cbuffer->reg.id, -- .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, -- .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, - .src_count = 1, - }; -+ -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; -+ instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; -+ instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ -+ instr.srcs[0].reg.idx_count = 3; -+ -+ instr.idx[0] = size; -+ instr.idx[1] = cbuffer->reg.space; -+ instr.idx_count = 2; -+ } -+ else -+ { -+ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; -+ instr.srcs[0].reg.idx[1].offset = size; -+ instr.srcs[0].reg.idx_count = 2; -+ } -+ - write_sm4_instruction(tpf, &instr); - } - -@@ -4163,7 +4272,6 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3DSPR_SAMPLER, -- .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - -@@ -4179,7 +4287,22 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - -- instr.dsts[0].reg.idx[0].offset = resource->id + i; -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ assert(!i); -+ instr.dsts[0].reg.idx[0].offset = resource->id; -+ instr.dsts[0].reg.idx[1].offset = resource->index; -+ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[0] = resource->space; -+ instr.idx_count = 1; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = resource->index + i; -+ instr.dsts[0].reg.idx_count = 1; -+ } - write_sm4_instruction(tpf, &instr); - } - } -@@ -4212,6 +4335,23 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - .idx_count = 1, - }; - -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ assert(!i); -+ instr.dsts[0].reg.idx[0].offset = resource->id; -+ instr.dsts[0].reg.idx[1].offset = resource->index; -+ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[1] = resource->space; -+ instr.idx_count = 2; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = resource->index + i; -+ instr.dsts[0].reg.idx_count = 1; -+ } -+ - if (uav) - { - switch (resource->data_type->sampler_dim) -@@ -4904,6 +5044,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct - write_sm4_instruction(tpf, &instr); - } - -+static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -+ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; -+ instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ - static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) - { - const struct hlsl_ir_node *arg1 = expr->operands[0].node; -@@ -4919,6 +5078,14 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - - switch (expr->op) - { -+ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -+ if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) -+ write_sm4_rasterizer_sample_count(tpf, &expr->node); -+ else -+ hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); -+ break; -+ - case HLSL_OP1_ABS: - switch (dst_type->e.numeric.type) - { -@@ -5799,21 +5966,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- { -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); -- - write_sm4_dcl_constant_buffer(&tpf, cbuffer); -- } - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); -- - if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -@@ -5875,7 +6034,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - for (unsigned int i = 0; i < extern_resources_count; ++i) - { -- if (extern_resources[i].data_type->e.resource.rasteriser_ordered) -+ if (extern_resources[i].data_type && extern_resources[i].data_type->e.resource.rasteriser_ordered) - *flags |= VKD3D_SM4_REQUIRES_ROVS; - } - sm4_free_extern_resources(extern_resources, extern_resources_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 14a3fa778e5..fdbde019111 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -23,6 +23,8 @@ - #include - #include - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -847,12 +849,13 @@ static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_contex - - static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) -- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) -- || handler_idx == VKD3DSIH_LD_UAV_TYPED -- || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) -- || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) -+ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR) -+ || opcode == VKD3DSIH_LD_UAV_TYPED -+ || (opcode == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) -+ || (opcode == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); - } - - static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, -@@ -863,9 +866,9 @@ static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context * - - static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC -- || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return opcode == VKD3DSIH_IMM_ATOMIC_ALLOC || opcode == VKD3DSIH_IMM_ATOMIC_CONSUME; - } - - static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, -@@ -876,9 +879,10 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_contex - - static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) -- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR); -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) -+ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR); - } - - static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, -@@ -1130,7 +1134,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - - context->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_CONSTANT_BUFFER: - vkd3d_shader_scan_constant_buffer_declaration(context, instruction); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 29b8d6ad022..96e613669a6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -151,6 +151,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, - VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, - VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, -+ VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -455,6 +457,10 @@ enum vkd3d_shader_opcode - VKD3DSIH_PHASE, - VKD3DSIH_PHI, - VKD3DSIH_POW, -+ VKD3DSIH_QUAD_READ_ACROSS_D, -+ VKD3DSIH_QUAD_READ_ACROSS_X, -+ VKD3DSIH_QUAD_READ_ACROSS_Y, -+ VKD3DSIH_QUAD_READ_LANE_AT, - VKD3DSIH_RCP, - VKD3DSIH_REP, - VKD3DSIH_RESINFO, -@@ -805,6 +811,7 @@ enum vkd3d_tessellator_domain - - #define VKD3DSI_NONE 0x0 - #define VKD3DSI_TEXLD_PROJECT 0x1 -+#define VKD3DSI_TEXLD_BIAS 0x2 - #define VKD3DSI_INDEXED_DYNAMIC 0x4 - #define VKD3DSI_RESINFO_RCP_FLOAT 0x1 - #define VKD3DSI_RESINFO_UINT 0x2 -@@ -1189,7 +1196,7 @@ struct vkd3d_shader_location - struct vkd3d_shader_instruction - { - struct vkd3d_shader_location location; -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - uint32_t flags; - unsigned int dst_count; - unsigned int src_count; -@@ -1238,8 +1245,8 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns - return v->major < major || (v->major == major && v->minor <= minor); - } - --void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx); -+void vsir_instruction_init(struct vkd3d_shader_instruction *ins, -+ const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode); - - static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) - { -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 95366d3441b..2354938c08d 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2025,7 +2025,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l - - static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, - const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, -- VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) -+ VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout, -+ struct d3d12_device *device) - { - bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); - VkPipelineStageFlags queue_shader_stages = 0; -@@ -2033,10 +2034,12 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) - { - queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT -- | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT -- | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT -- | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT - | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; -+ if (device->vk_info.geometry_shaders) -+ queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; -+ if (device->vk_info.tessellation_shaders) -+ queue_shader_stages |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT -+ | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; - } - if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) - queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; -@@ -2054,7 +2057,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - { - if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) - return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, -- resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); -+ resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout, device); - - *access_mask = VK_ACCESS_MEMORY_READ_BIT; - *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -@@ -2251,7 +2254,8 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 - VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; - - if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, -- resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) -+ resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, -+ &dst_stage_mask, &barrier.newLayout, list->device)) - { - FIXME("Unhandled state %#x.\n", resource->initial_state); - return; -@@ -4277,13 +4281,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - } - - if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, -- resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) -+ resource, list->vk_queue_flags, vk_info, &src_access_mask, -+ &src_stage_mask, &layout_before, list->device)) - { - FIXME("Unhandled state %#x.\n", state_before); - continue; - } - if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, -- resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) -+ resource, list->vk_queue_flags, vk_info, &dst_access_mask, -+ &dst_stage_mask, &layout_after, list->device)) - { - FIXME("Unhandled state %#x.\n", state_after); - continue; -@@ -4303,7 +4309,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - - resource = unsafe_impl_from_ID3D12Resource(uav->pResource); - vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, -- resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); -+ resource, list->vk_queue_flags, vk_info, &access_mask, -+ &stage_mask, &image_layout, list->device); - src_access_mask = dst_access_mask = access_mask; - src_stage_mask = dst_stage_mask = stage_mask; - layout_before = layout_after = image_layout; -@@ -4814,15 +4821,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - return; - } - -- if (!views) -- { -- WARN("NULL \"views\" pointer specified.\n"); -- return; -- } -- - for (i = 0; i < view_count; ++i) - { -- if (views[i].BufferLocation) -+ if (views && views[i].BufferLocation) - { - resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); - buffers[i] = resource->u.vk_buffer; -@@ -5434,6 +5435,52 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - } - } - -+static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const struct vkd3d_resource_view *view, -+ struct d3d12_resource *resource, VkClearColorValue *colour) -+{ -+ struct vkd3d_texture_view_desc view_desc; -+ const struct vkd3d_format *uint_format; -+ struct vkd3d_view *uint_view; -+ -+ if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) -+ && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, colour))) -+ { -+ ERR("Unhandled format %#x.\n", view->format->dxgi_format); -+ return NULL; -+ } -+ -+ if (d3d12_resource_is_buffer(resource)) -+ { -+ if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_buffer, -+ uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) -+ { -+ ERR("Failed to create buffer view.\n"); -+ return NULL; -+ } -+ -+ return uint_view; -+ } -+ -+ memset(&view_desc, 0, sizeof(view_desc)); -+ view_desc.view_type = view->info.texture.vk_view_type; -+ view_desc.format = uint_format; -+ view_desc.miplevel_idx = view->info.texture.miplevel_idx; -+ view_desc.miplevel_count = 1; -+ view_desc.layer_idx = view->info.texture.layer_idx; -+ view_desc.layer_count = view->info.texture.layer_count; -+ view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; -+ view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; -+ -+ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, -+ resource->u.vk_image, &view_desc, &uint_view)) -+ { -+ ERR("Failed to create image view.\n"); -+ return NULL; -+ } -+ -+ return uint_view; -+} -+ - static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const UINT values[4], UINT rect_count, const D3D12_RECT *rects) -@@ -5441,8 +5488,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); - struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; -- struct vkd3d_texture_view_desc view_desc; -- const struct vkd3d_format *uint_format; - const struct vkd3d_resource_view *view; - struct d3d12_resource *resource_impl; - VkClearColorValue colour; -@@ -5456,44 +5501,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - view = &descriptor->v; - memcpy(colour.uint32, values, sizeof(colour.uint32)); - -- if (view->format->type != VKD3D_FORMAT_TYPE_UINT) -+ if (view->format->type != VKD3D_FORMAT_TYPE_UINT -+ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) - { -- if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) -- && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) -- { -- ERR("Unhandled format %#x.\n", view->format->dxgi_format); -- return; -- } -- -- if (d3d12_resource_is_buffer(resource_impl)) -- { -- if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, -- uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) -- { -- ERR("Failed to create buffer view.\n"); -- return; -- } -- } -- else -- { -- memset(&view_desc, 0, sizeof(view_desc)); -- view_desc.view_type = view->info.texture.vk_view_type; -- view_desc.format = uint_format; -- view_desc.miplevel_idx = view->info.texture.miplevel_idx; -- view_desc.miplevel_count = 1; -- view_desc.layer_idx = view->info.texture.layer_idx; -- view_desc.layer_count = view->info.texture.layer_count; -- view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; -- view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; -- -- if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, -- &uint_view)) -- { -- ERR("Failed to create image view.\n"); -- return; -- } -- } -- descriptor = uint_view; -+ ERR("Failed to create UINT view.\n"); -+ return; - } - - d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); -@@ -5507,19 +5519,32 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I - const float values[4], UINT rect_count, const D3D12_RECT *rects) - { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct vkd3d_view *descriptor, *uint_view = NULL; -+ struct d3d12_device *device = list->device; -+ const struct vkd3d_resource_view *view; - struct d3d12_resource *resource_impl; - VkClearColorValue colour; -- struct vkd3d_view *view; - - TRACE("iface %p, gpu_handle %s, cpu_handle %s, resource %p, values %p, rect_count %u, rects %p.\n", - iface, debug_gpu_handle(gpu_handle), debug_cpu_handle(cpu_handle), resource, values, rect_count, rects); - - resource_impl = unsafe_impl_from_ID3D12Resource(resource); -- if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) -+ if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) - return; -+ view = &descriptor->v; - memcpy(colour.float32, values, sizeof(colour.float32)); - -- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); -+ if (view->format->type == VKD3D_FORMAT_TYPE_SINT -+ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) -+ { -+ ERR("Failed to create UINT view.\n"); -+ return; -+ } -+ -+ d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); -+ -+ if (uint_view) -+ vkd3d_view_decref(uint_view, device); - } - - static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index cfc9c5f5ed3..2bbc170504e 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -76,6 +76,14 @@ static const char * const required_device_extensions[] = - VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, - }; - -+/* In general we don't want to enable Vulkan beta extensions, but make an -+ * exception for VK_KHR_portability_subset because we draw no real feature from -+ * it, but it's still useful to be able to develop for MoltenVK without being -+ * spammed with validation errors. */ -+#ifndef VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME -+#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" -+#endif -+ - static const struct vkd3d_optional_extension_info optional_device_extensions[] = - { - /* KHR extensions */ -@@ -85,6 +93,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), - VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), - VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), -+ VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), - VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), - VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), - VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), -@@ -92,7 +101,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), - VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), - VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), -- VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), -+ VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), - VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), - VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), - VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), -@@ -1634,6 +1643,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; -+ vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; -+ vulkan_info->tessellation_shaders = physical_device_info->features2.features.tessellationShader; - vulkan_info->sparse_binding = features->sparseBinding; - vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; -@@ -3806,7 +3817,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - return E_INVALIDARG; - } - -- data->UnalignedBlockTexturesSupported = FALSE; -+ /* Vulkan does not restrict block texture alignment. */ -+ data->UnalignedBlockTexturesSupported = TRUE; - - TRACE("Unaligned block texture support %#x.\n", data->UnalignedBlockTexturesSupported); - return S_OK; -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index c897d9f2c5a..7d7f40c0953 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -1809,14 +1809,6 @@ static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC1 *d - return false; - } - -- if (align(desc->Width, format->block_width) != desc->Width -- || align(desc->Height, format->block_height) != desc->Height) -- { -- WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", -- desc->Width, desc->Height, desc->Format); -- return false; -- } -- - return true; - } - -@@ -4357,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript - return hr; - - descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); -- d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); -+ if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) -+ { -+ vkd3d_private_store_destroy(&descriptor_heap->private_store); -+ return hr; -+ } - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - - d3d12_device_add_ref(descriptor_heap->device = device); -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index d1fa866d9e3..7acd39d65be 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -123,6 +123,7 @@ struct vkd3d_vulkan_info - bool KHR_image_format_list; - bool KHR_maintenance2; - bool KHR_maintenance3; -+ bool KHR_portability_subset; - bool KHR_push_descriptor; - bool KHR_sampler_mirror_clamp_to_edge; - bool KHR_timeline_semaphore; -@@ -145,6 +146,8 @@ struct vkd3d_vulkan_info - - bool rasterization_stream; - bool transform_feedback_queries; -+ bool geometry_shaders; -+ bool tessellation_shaders; - - bool uav_read_without_format; - --- -2.43.0 - diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-5a53b739959db74e8dcce023a7d49356b90.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-5a53b739959db74e8dcce023a7d49356b90.patch deleted file mode 100644 index 0f9929e7..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-5a53b739959db74e8dcce023a7d49356b90.patch +++ /dev/null @@ -1,1053 +0,0 @@ -From e7e5e0e85ccf11074ec59e202e30924fbad10df2 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Mon, 15 Jul 2024 10:03:30 +1000 -Subject: [PATCH] Updated vkd3d to 5a53b739959db74e8dcce023a7d49356b9008e92. - ---- - libs/vkd3d/include/vkd3d_shader.h | 217 ++++++++++++++ - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 11 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 12 + - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 280 +++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 99 ++++++- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 46 ++- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 8 +- - 9 files changed, 649 insertions(+), 28 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d3afcc11b16..4acb622468a 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -105,6 +105,11 @@ enum vkd3d_shader_structure_type - * \since 1.10 - */ - VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO, -+ /** -+ * The structure is a vkd3d_shader_parameter_info structure. -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -453,44 +458,167 @@ enum vkd3d_shader_binding_flag - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), - }; - -+/** -+ * The manner in which a parameter value is provided to the shader, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - enum vkd3d_shader_parameter_type - { - VKD3D_SHADER_PARAMETER_TYPE_UNKNOWN, -+ /** The parameter value is embedded directly in the shader. */ - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, -+ /** -+ * The parameter value is provided to the shader via a specialization -+ * constant. This value is only supported for the SPIR-V target type. -+ */ - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, -+ /** -+ * The parameter value is provided to the shader as part of a uniform -+ * buffer. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_TYPE_BUFFER, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_TYPE), - }; - -+/** -+ * The format of data provided to the shader, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - enum vkd3d_shader_parameter_data_type - { - VKD3D_SHADER_PARAMETER_DATA_TYPE_UNKNOWN, -+ /** The parameter is provided as a 32-bit unsigned integer. */ - VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, -+ /** The parameter is provided as a 32-bit float. \since 1.13 */ -+ VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), - }; - -+/** -+ * Names a specific shader parameter, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - enum vkd3d_shader_parameter_name - { - VKD3D_SHADER_PARAMETER_NAME_UNKNOWN, -+ /** -+ * The sample count of the framebuffer, as returned by the HLSL function -+ * GetRenderTargetSampleCount() or the GLSL builtin gl_NumSamples. -+ * -+ * This parameter should be specified when compiling to SPIR-V, which -+ * provides no builtin ability to query this information from the shader. -+ * -+ * The default value is 1. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ */ - VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, -+ /** -+ * Alpha test comparison function. When this parameter is provided, if the -+ * alpha component of the pixel shader colour output at location 0 fails the -+ * test, as defined by this function and the reference value provided by -+ * VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, the fragment will be -+ * discarded. -+ * -+ * This parameter, along with VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, -+ * can be used to implement fixed function alpha test, as present in -+ * Direct3D versions up to 9, if the target environment does not support -+ * alpha test as part of its own fixed-function API (as Vulkan and core -+ * OpenGL). -+ * -+ * The default value is VKD3D_SHADER_COMPARISON_FUNC_ALWAYS. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. The value specified must be -+ * a member of enum vkd3d_shader_comparison_func. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC, -+ /** -+ * Alpha test reference value. -+ * See VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC for documentation of -+ * alpha test. -+ * -+ * The default value is zero. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), - }; - -+/** -+ * The value of an immediate constant parameter, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - struct vkd3d_shader_parameter_immediate_constant - { - union - { -+ /** -+ * The value if the parameter's data type is -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ */ - uint32_t u32; -+ /** -+ * The value if the parameter's data type is -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * \since 1.13 -+ */ -+ float f32; - } u; - }; - -+/** -+ * The linkage of a specialization constant parameter, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - struct vkd3d_shader_parameter_specialization_constant - { -+ /** The ID of the specialization constant. */ - uint32_t id; - }; - -+/** -+ * The linkage of a parameter specified through a uniform buffer, used in -+ * struct vkd3d_shader_parameter1. -+ */ -+struct vkd3d_shader_parameter_buffer -+{ -+ /** -+ * The set of the uniform buffer descriptor. If the target environment does -+ * not support descriptor sets, this value must be set to 0. -+ */ -+ unsigned int set; -+ /** The binding index of the uniform buffer descriptor. */ -+ unsigned int binding; -+ /** The byte offset of the parameter within the buffer. */ -+ uint32_t offset; -+}; -+ -+/** -+ * An individual shader parameter. -+ * -+ * This structure is an earlier version of struct vkd3d_shader_parameter1 -+ * which supports fewer parameter types; -+ * refer to that structure for usage information. -+ * -+ * Only the following types may be used with this structure: -+ * -+ * - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT -+ * - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT -+ */ - struct vkd3d_shader_parameter - { - enum vkd3d_shader_parameter_name name; -@@ -503,6 +631,56 @@ struct vkd3d_shader_parameter - } u; - }; - -+/** -+ * An individual shader parameter. -+ * -+ * This structure is used in struct vkd3d_shader_parameter_info; see there for -+ * explanation of shader parameters. -+ * -+ * For example, to specify the rasterizer sample count to the shader via an -+ * unsigned integer specialization constant with ID 3, -+ * set the following members: -+ * -+ * - \a name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT -+ * - \a type = VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT -+ * - \a data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32 -+ * - \a u.specialization_constant.id = 3 -+ * -+ * This structure is an extended version of struct vkd3d_shader_parameter. -+ */ -+struct vkd3d_shader_parameter1 -+{ -+ /** The builtin parameter to be mapped. */ -+ enum vkd3d_shader_parameter_name name; -+ /** How the parameter will be provided to the shader. */ -+ enum vkd3d_shader_parameter_type type; -+ /** -+ * The data type of the supplied parameter, which determines how it is to -+ * be interpreted. -+ */ -+ enum vkd3d_shader_parameter_data_type data_type; -+ union -+ { -+ /** -+ * Additional information if \a type is -+ * VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT. -+ */ -+ struct vkd3d_shader_parameter_immediate_constant immediate_constant; -+ /** -+ * Additional information if \a type is -+ * VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT. -+ */ -+ struct vkd3d_shader_parameter_specialization_constant specialization_constant; -+ /** -+ * Additional information if \a type is -+ * VKD3D_SHADER_PARAMETER_TYPE_BUFFER. -+ */ -+ struct vkd3d_shader_parameter_buffer buffer; -+ void *_pointer_pad; -+ uint32_t _pad[4]; -+ } u; -+}; -+ - /** - * Symbolic register indices for mapping uniform constant register sets in - * legacy Direct3D bytecode to constant buffer views in the target environment. -@@ -1994,6 +2172,44 @@ struct vkd3d_shader_varying_map_info - unsigned int varying_count; - }; - -+/** -+ * Interface information regarding a builtin shader parameter. -+ * -+ * Like compile options specified with struct vkd3d_shader_compile_option, -+ * parameters are used to specify certain values which are not part of the -+ * source shader bytecode but which need to be specified in the shader bytecode -+ * in the target format. -+ * Unlike struct vkd3d_shader_compile_option, however, this structure allows -+ * parameters to be specified in a variety of different ways, as described by -+ * enum vkd3d_shader_parameter_type. -+ * -+ * This structure is an extended version of struct vkd3d_shader_parameter as -+ * used in struct vkd3d_shader_spirv_target_info, which allows more parameter -+ * types to be used, and also allows specifying parameters when compiling -+ * shaders to target types other than SPIR-V. If this structure is chained -+ * along with vkd3d_shader_spirv_target_info, any parameters specified in the -+ * latter structure are ignored. -+ * -+ * This structure is passed to vkd3d_shader_compile() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * This structure contains only input parameters. -+ * -+ * \since 1.13 -+ */ -+struct vkd3d_shader_parameter_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** Pointer to an array of dynamic parameters for this shader instance. */ -+ const struct vkd3d_shader_parameter1 *parameters; -+ /** Size, in elements, of \ref parameters. */ -+ unsigned int parameter_count; -+}; -+ - #ifdef LIBVKD3D_SHADER_SOURCE - # define VKD3D_SHADER_API VKD3D_EXPORT - #else -@@ -2077,6 +2293,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * - vkd3d_shader_descriptor_offset_info - * - vkd3d_shader_hlsl_source_info - * - vkd3d_shader_interface_info -+ * - vkd3d_shader_parameter_info - * - vkd3d_shader_preprocess_info - * - vkd3d_shader_scan_combined_resource_sampler_info - * - vkd3d_shader_scan_descriptor_info -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 4522d56c5c9..abfbd461b33 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -1272,7 +1272,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st - sm1->end = &code[token_count]; - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) -+ if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 2176debc7d2..bf581928a9e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -10206,12 +10206,13 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 - return NULL; - } - --static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) - { - size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; -+ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; - struct shader_signature *patch_constant_signature, *output_signature, *input_signature; -- const struct vkd3d_shader_location location = {.source_name = source_name}; - uint32_t version_token, dxil_version, token_count, magic; - const uint32_t *byte_code = dxbc_desc->byte_code; - unsigned int chunk_offset, chunk_size; -@@ -10302,9 +10303,9 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - - /* Estimate instruction count to avoid reallocation in most shaders. */ - count = max(token_count, 400) - 400; -- if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) -+ if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); -+ vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); - sm6->ptr = &sm6->start[1]; - sm6->bitpos = 2; - -@@ -10565,7 +10566,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - dxbc_desc.byte_code = byte_code; - } - -- ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); -+ ret = sm6_parser_init(&sm6, program, compile_info, message_context, &dxbc_desc); - free_dxbc_shader_desc(&dxbc_desc); - vkd3d_free(byte_code); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 7b058a65bc1..56736a65306 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -4065,6 +4065,17 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, rad, loc); - } - -+static bool intrinsic_rcp(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RCP, arg, loc); -+} -+ - static bool intrinsic_reflect(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4760,6 +4771,7 @@ intrinsic_functions[] = - {"normalize", 1, true, intrinsic_normalize}, - {"pow", 2, true, intrinsic_pow}, - {"radians", 1, true, intrinsic_radians}, -+ {"rcp", 1, true, intrinsic_rcp}, - {"reflect", 2, true, intrinsic_reflect}, - {"refract", 3, true, intrinsic_refract}, - {"round", 1, true, intrinsic_round}, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 7e4f168675e..02884df9d76 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -5691,7 +5691,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl - version.major = ctx->profile->major_version; - version.minor = ctx->profile->minor_version; - version.type = ctx->profile->type; -- if (!vsir_program_init(program, &version, 0)) -+ if (!vsir_program_init(program, NULL, &version, 0)) - { - ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; - return; -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index e5432cb35ce..be9e4219d6a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -19,9 +19,73 @@ - #include "vkd3d_shader_private.h" - #include "vkd3d_types.h" - --bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) -+static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, -+ unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) -+{ -+ const struct vkd3d_shader_spirv_target_info *spirv_info; -+ struct vkd3d_shader_parameter1 *parameters; -+ -+ *ret_count = 0; -+ *ret_parameters = NULL; -+ -+ if (!(spirv_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO)) || !spirv_info->parameter_count) -+ return VKD3D_OK; -+ -+ if (!(parameters = vkd3d_calloc(spirv_info->parameter_count, sizeof(*parameters)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ for (unsigned int i = 0; i < spirv_info->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter *src = &spirv_info->parameters[i]; -+ struct vkd3d_shader_parameter1 *dst = ¶meters[i]; -+ -+ dst->name = src->name; -+ dst->type = src->type; -+ dst->data_type = src->data_type; -+ -+ if (src->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ dst->u.immediate_constant = src->u.immediate_constant; -+ } -+ else if (src->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) -+ { -+ dst->u.specialization_constant = src->u.specialization_constant; -+ } -+ else -+ { -+ ERR("Invalid parameter type %#x.\n", src->type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ } -+ -+ *ret_count = spirv_info->parameter_count; -+ *ret_parameters = parameters; -+ -+ return VKD3D_OK; -+} -+ -+bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_version *version, unsigned int reserve) - { - memset(program, 0, sizeof(*program)); -+ -+ if (compile_info) -+ { -+ const struct vkd3d_shader_parameter_info *parameter_info; -+ -+ if ((parameter_info = vkd3d_find_struct(compile_info->next, PARAMETER_INFO))) -+ { -+ program->parameter_count = parameter_info->parameter_count; -+ program->parameters = parameter_info->parameters; -+ } -+ else -+ { -+ if (convert_parameter_info(compile_info, &program->parameter_count, &program->parameters) < 0) -+ return false; -+ program->free_parameters = true; -+ } -+ } -+ - program->shader_version = *version; - return shader_instruction_array_init(&program->instructions, reserve); - } -@@ -30,6 +94,8 @@ void vsir_program_cleanup(struct vsir_program *program) - { - size_t i; - -+ if (program->free_parameters) -+ vkd3d_free((void *)program->parameters); - for (i = 0; i < program->block_name_count; ++i) - vkd3d_free((void *)program->block_names[i]); - vkd3d_free(program->block_names); -@@ -666,6 +732,12 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne - dst->write_mask = VKD3DSP_WRITEMASK_0; - } - -+static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src->reg.idx[0].offset = idx; -+} -+ - static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) - { - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -@@ -678,6 +750,12 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 - src->reg.u.immconst_u32[0] = value; - } - -+static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) -+{ -+ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); -+ src->reg.idx[0].offset = idx; -+} -+ - void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode opcode) - { -@@ -5282,6 +5360,203 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - return VKD3D_OK; - } - -+static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) -+{ -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ { -+ if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET -+ && !signature->elements[i].register_index) -+ { -+ *index = i; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, -+ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, -+ const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ -+ static const struct -+ { -+ enum vkd3d_shader_opcode float_opcode; -+ enum vkd3d_shader_opcode uint_opcode; -+ bool swap; -+ } -+ opcodes[] = -+ { -+ [VKD3D_SHADER_COMPARISON_FUNC_EQUAL] = {VKD3DSIH_EQO, VKD3DSIH_IEQ}, -+ [VKD3D_SHADER_COMPARISON_FUNC_NOT_EQUAL] = {VKD3DSIH_NEO, VKD3DSIH_INE}, -+ [VKD3D_SHADER_COMPARISON_FUNC_GREATER_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE}, -+ [VKD3D_SHADER_COMPARISON_FUNC_LESS] = {VKD3DSIH_LTO, VKD3DSIH_ULT}, -+ [VKD3D_SHADER_COMPARISON_FUNC_LESS_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE, true}, -+ [VKD3D_SHADER_COMPARISON_FUNC_GREATER] = {VKD3DSIH_LTO, VKD3DSIH_ULT, true}, -+ }; -+ -+ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) -+ { -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &program->instructions.elements[pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; -+ src_param_init_const_uint(&ins->src[0], 0); -+ -+ *ret_pos = pos + 1; -+ return VKD3D_OK; -+ } -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[pos]; -+ -+ switch (ref->data_type) -+ { -+ case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: -+ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); -+ src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); -+ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); -+ break; -+ -+ case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: -+ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); -+ src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); -+ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); -+ break; -+ -+ default: -+ FIXME("Unhandled parameter data type %#x.\n", ref->data_type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ dst_param_init_ssa_bool(&ins->dst[0], program->ssa_count); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); -+ ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); -+ -+ ++ins; -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; -+ src_param_init_ssa_bool(&ins->src[0], program->ssa_count); -+ -+ ++program->ssa_count; -+ -+ ++ins; -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = colour_signature_idx; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].write_mask = program->output_signature.elements[colour_signature_idx].mask; -+ src_param_init_temp_float(&ins->src[0], colour_temp); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ *ret_pos = pos + 3; -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; -+ static const struct vkd3d_shader_location no_loc; -+ enum vkd3d_shader_comparison_func compare_func; -+ uint32_t colour_signature_idx, colour_temp; -+ struct vkd3d_shader_instruction *ins; -+ size_t new_pos; -+ int ret; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ return VKD3D_OK; -+ -+ if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) -+ || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) -+ return VKD3D_OK; -+ -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -+ -+ if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC) -+ func = parameter; -+ else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF) -+ ref = parameter; -+ } -+ -+ if (!func || !ref) -+ return VKD3D_OK; -+ -+ if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported alpha test function parameter type %#x.\n", func->type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid alpha test function parameter data type %#x.\n", func->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ compare_func = func->u.immediate_constant.u.u32; -+ -+ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_ALWAYS) -+ return VKD3D_OK; -+ -+ /* We're going to be reading from the output, so we need to go -+ * through the whole shader and convert it to a temp. */ -+ -+ if (compare_func != VKD3D_SHADER_COMPARISON_FUNC_NEVER) -+ colour_temp = program->temp_count++; -+ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ if (ins->opcode == VKD3DSIH_RET) -+ { -+ if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, -+ ref, colour_signature_idx, colour_temp, &new_pos)) < 0) -+ return ret; -+ i = new_pos; -+ continue; -+ } -+ -+ /* No need to convert it if the comparison func is NEVER; we don't -+ * read from the output in that case. */ -+ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) -+ continue; -+ -+ for (size_t j = 0; j < ins->dst_count; ++j) -+ { -+ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; -+ -+ /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) -+ { -+ dst->reg.type = VKD3DSPR_TEMP; -+ dst->reg.idx[0].offset = colour_temp; -+ } -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ - struct validation_context - { - struct vkd3d_shader_message_context *message_context; -@@ -6274,6 +6549,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - return result; - } - -+ if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) -+ return result; -+ - if (TRACE_ON()) - vkd3d_shader_trace(program); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 524fb8e9b1f..72a6f1e60dc 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -2418,6 +2418,13 @@ struct spirv_compiler - uint32_t *descriptor_offset_ids; - struct vkd3d_push_constant_buffer_binding *push_constants; - const struct vkd3d_shader_spirv_target_info *spirv_target_info; -+ const struct vkd3d_shader_parameter1 *parameters; -+ unsigned int parameter_count; -+ -+ struct -+ { -+ uint32_t buffer_id; -+ } *spirv_parameter_info; - - bool prolog_emitted; - struct shader_signature input_signature; -@@ -3290,16 +3297,15 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil - return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); - } - --static const struct vkd3d_shader_parameter *spirv_compiler_get_shader_parameter( -+static const struct vkd3d_shader_parameter1 *spirv_compiler_get_shader_parameter( - struct spirv_compiler *compiler, enum vkd3d_shader_parameter_name name) - { -- const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - unsigned int i; - -- for (i = 0; info && i < info->parameter_count; ++i) -+ for (i = 0; i < compiler->parameter_count; ++i) - { -- if (info->parameters[i].name == name) -- return &info->parameters[i]; -+ if (compiler->parameters[i].name == name) -+ return &compiler->parameters[i]; - } - - return NULL; -@@ -3314,6 +3320,7 @@ static const struct vkd3d_spec_constant_info - vkd3d_shader_parameters[] = - { - {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, -+ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, - }; - - static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) -@@ -3352,7 +3359,7 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com - } - - static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name, uint32_t spec_id) -+ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_spec_constant_info *info; -@@ -3361,7 +3368,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile - info = get_spec_constant_info(name); - default_value = info ? info->default_value : 0; - -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -+ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); - id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); - -@@ -3380,7 +3387,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile - } - - static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name, uint32_t spec_id) -+ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) - { - unsigned int i; - -@@ -3390,13 +3397,29 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler - return compiler->spec_constants[i].id; - } - -- return spirv_compiler_emit_spec_constant(compiler, name, spec_id); -+ return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type); -+} -+ -+static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ unsigned int index = parameter - compiler->parameters; -+ uint32_t type_id, ptr_id, ptr_type_id; -+ -+ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); -+ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); -+ ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, -+ compiler->spirv_parameter_info[index].buffer_id, -+ spirv_compiler_get_constant_uint(compiler, 0)); -+ return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); - } - --static uint32_t spirv_compiler_emit_uint_shader_parameter(struct spirv_compiler *compiler, -+static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, - enum vkd3d_shader_parameter_name name) - { -- const struct vkd3d_shader_parameter *parameter; -+ const struct vkd3d_shader_parameter1 *parameter; -+ enum vkd3d_data_type type = VKD3D_DATA_UINT; - - if (!(parameter = spirv_compiler_get_shader_parameter(compiler, name))) - { -@@ -3405,15 +3428,28 @@ static uint32_t spirv_compiler_emit_uint_shader_parameter(struct spirv_compiler - } - - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -- return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); -+ { -+ if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32); -+ else -+ return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); -+ } -+ -+ if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ type = VKD3D_DATA_FLOAT; -+ else -+ type = VKD3D_DATA_UINT; -+ - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) -- return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id); -+ return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); -+ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) -+ return spirv_compiler_get_buffer_parameter(compiler, parameter, type); - - FIXME("Unhandled parameter type %#x.\n", parameter->type); - - default_parameter: - return spirv_compiler_get_spec_constant(compiler, -- name, spirv_compiler_alloc_spec_constant_id(compiler)); -+ name, spirv_compiler_alloc_spec_constant_id(compiler), type); - } - - static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, -@@ -4188,6 +4224,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); - else if (reg->type == VKD3DSPR_UNDEF) - return spirv_compiler_emit_load_undef(compiler, reg, write_mask); -+ else if (reg->type == VKD3DSPR_PARAMETER) -+ return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset); - - component_count = vsir_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -8129,6 +8167,8 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, - if (src->reg.data_type != VKD3D_DATA_BOOL) - condition_id = spirv_compiler_emit_int_to_bool(compiler, - instruction->flags, src->reg.data_type, 1, condition_id); -+ else if (instruction->flags & VKD3D_SHADER_CONDITIONAL_OP_Z) -+ condition_id = vkd3d_spirv_build_op_logical_not(builder, vkd3d_spirv_get_op_type_bool(builder), condition_id); - void_id = vkd3d_spirv_get_op_type_void(builder); - vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), - &condition_id, 1); -@@ -9525,7 +9565,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co - - if (src->reg.type == VKD3DSPR_RASTERIZER) - { -- val_id = spirv_compiler_emit_uint_shader_parameter(compiler, -+ val_id = spirv_compiler_emit_shader_parameter(compiler, - VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT); - } - else -@@ -10570,6 +10610,35 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - - spirv_compiler_emit_descriptor_declarations(compiler); - -+ compiler->parameter_count = program->parameter_count; -+ compiler->parameters = program->parameters; -+ compiler->spirv_parameter_info = vkd3d_calloc(compiler->parameter_count, sizeof(*compiler->spirv_parameter_info)); -+ for (i = 0; i < compiler->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &compiler->parameters[i]; -+ -+ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) -+ { -+ uint32_t type_id, struct_id, ptr_type_id, var_id; -+ -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -+ -+ struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); -+ vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); -+ vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, -+ SpvDecorationOffset, parameter->u.buffer.offset); -+ -+ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, struct_id); -+ var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, -+ ptr_type_id, SpvStorageClassUniform, 0); -+ -+ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationDescriptorSet, parameter->u.buffer.set); -+ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationBinding, parameter->u.buffer.binding); -+ -+ compiler->spirv_parameter_info[i].buffer_id = var_id; -+ } -+ } -+ - if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index a7c37215e5e..3a9a402e8e2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -2493,7 +2493,7 @@ fail: - } - - static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, -- const uint32_t *byte_code, size_t byte_code_size, const char *source_name, -+ const uint32_t *byte_code, size_t byte_code_size, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_version version; -@@ -2552,9 +2552,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vsir_program_init(program, &version, token_count / 7u + 20)) -+ if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) - return false; -- vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); -+ vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); - sm4->ptr = sm4->start; - - init_sm4_lookup_tables(&sm4->lookup); -@@ -2651,7 +2651,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - } - - if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, -- compile_info->source_name, message_context)) -+ compile_info, message_context)) - { - WARN("Failed to initialise shader parser.\n"); - free_dxbc_shader_desc(&dxbc_desc); -@@ -5189,6 +5189,44 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - break; - -+ case HLSL_OP1_RCP: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ /* SM5 comes with a RCP opcode */ -+ if (tpf->ctx->profile->major_version >= 5) -+ { -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); -+ } -+ else -+ { -+ /* For SM4, implement as DIV dst, 1.0, src */ -+ struct sm4_instruction instr; -+ struct hlsl_constant_value one; -+ -+ assert(type_is_float(dst_type)); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_DIV; -+ -+ sm4_dst_from_node(&instr.dsts[0], &expr->node); -+ instr.dst_count = 1; -+ -+ for (unsigned int i = 0; i < 4; i++) -+ one.u[i].f = 1.0f; -+ sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); -+ sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+ } -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); -+ } -+ break; -+ - case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 96e613669a6..bf9759ebbbf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -619,6 +619,7 @@ enum vkd3d_shader_register_type - VKD3DSPR_SSA, - VKD3DSPR_WAVELANECOUNT, - VKD3DSPR_WAVELANEINDEX, -+ VKD3DSPR_PARAMETER, - - VKD3DSPR_COUNT, - -@@ -1362,6 +1363,10 @@ struct vsir_program - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; - -+ unsigned int parameter_count; -+ const struct vkd3d_shader_parameter1 *parameters; -+ bool free_parameters; -+ - unsigned int input_control_point_count, output_control_point_count; - unsigned int flat_constant_count[3]; - unsigned int block_count; -@@ -1377,7 +1382,8 @@ void vsir_program_cleanup(struct vsir_program *program); - int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); --bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); -+bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_version *version, unsigned int reserve); - enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, --- -2.43.0 - diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-c8cc1b1a2476a4c518756fd7604d37e8c16.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-c8cc1b1a2476a4c518756fd7604d37e8c16.patch new file mode 100644 index 00000000..5418c3ef --- /dev/null +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-c8cc1b1a2476a4c518756fd7604d37e8c16.patch @@ -0,0 +1,1777 @@ +From 5942f59d8cade40d6bb21f8c46fc05fcee79dbb3 Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 20 Aug 2024 06:47:55 +1000 +Subject: [PATCH] Updated vkd3d to c8cc1b1a2476a4c518756fd7604d37e8c1611af3. + +--- + libs/vkd3d/include/private/vkd3d_memory.h | 3 +- + libs/vkd3d/include/vkd3d_shader.h | 2 +- + libs/vkd3d/libs/vkd3d-common/debug.c | 1 - + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 10 +- + libs/vkd3d/libs/vkd3d-shader/dxil.c | 4 +- + libs/vkd3d/libs/vkd3d-shader/fx.c | 389 +++++++++++++++--- + libs/vkd3d/libs/vkd3d-shader/glsl.c | 6 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.c | 45 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 10 +- + libs/vkd3d/libs/vkd3d-shader/hlsl.l | 2 + + libs/vkd3d/libs/vkd3d-shader/hlsl.y | 125 ++++-- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 2 + + libs/vkd3d/libs/vkd3d-shader/ir.c | 36 +- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 94 +++-- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 1 - + libs/vkd3d/libs/vkd3d/resource.c | 2 +- + libs/vkd3d/libs/vkd3d/state.c | 12 +- + libs/vkd3d/libs/vkd3d/vkd3d_private.h | 3 +- + 19 files changed, 582 insertions(+), 167 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h +index 682d35c03c6..e191dc11b73 100644 +--- a/libs/vkd3d/include/private/vkd3d_memory.h ++++ b/libs/vkd3d/include/private/vkd3d_memory.h +@@ -19,7 +19,6 @@ + #ifndef __VKD3D_MEMORY_H + #define __VKD3D_MEMORY_H + +-#include + #include + #include + #include +@@ -44,7 +43,7 @@ static inline void *vkd3d_realloc(void *ptr, size_t size) + static inline void *vkd3d_calloc(size_t count, size_t size) + { + void *ptr; +- assert(count <= ~(size_t)0 / size); ++ VKD3D_ASSERT(count <= ~(size_t)0 / size); + if (!(ptr = calloc(count, size))) + ERR("Out of memory.\n"); + return ptr; +diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h +index d4756810065..d37d8ebad9e 100644 +--- a/libs/vkd3d/include/vkd3d_shader.h ++++ b/libs/vkd3d/include/vkd3d_shader.h +@@ -1876,7 +1876,7 @@ enum vkd3d_shader_sysval_semantic + VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX = 0x05, + /** Vertex ID; SV_VertexID in Direct3D. */ + VKD3D_SHADER_SV_VERTEX_ID = 0x06, +- /** Primtive ID; SV_PrimitiveID in Direct3D. */ ++ /** Primitive ID; SV_PrimitiveID in Direct3D. */ + VKD3D_SHADER_SV_PRIMITIVE_ID = 0x07, + /** Instance ID; SV_InstanceID in Direct3D. */ + VKD3D_SHADER_SV_INSTANCE_ID = 0x08, +diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c +index 4bfc19bd9a1..9a92f0ead02 100644 +--- a/libs/vkd3d/libs/vkd3d-common/debug.c ++++ b/libs/vkd3d/libs/vkd3d-common/debug.c +@@ -22,7 +22,6 @@ + + #include "vkd3d_common.h" + +-#include + #include + #include + #include +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +index 2c2f0c43ece..77e9711300f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +@@ -2251,7 +2251,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; + /* SV_Coverage has name vCoverage when used as an input, +- * but it doens't appear in the signature in that case. */ ++ * but it doesn't appear in the signature in that case. */ + case VKD3D_SHADER_SV_COVERAGE: return "oMask"; + case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; + default: return "??"; +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index a4c038a233a..d05394c3ab7 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1523,6 +1523,8 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + break; + } + +@@ -1626,6 +1628,8 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + break; + } + +@@ -1826,17 +1830,17 @@ void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buff + break; + + case HLSL_TYPE_INT: +- uni.f = var->default_values[k].value.i; ++ uni.f = var->default_values[k].number.i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: +- uni.f = var->default_values[k].value.u; ++ uni.f = var->default_values[k].number.u; + break; + + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- uni.u = var->default_values[k].value.u; ++ uni.u = var->default_values[k].number.u; + break; + + default: +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c +index 2a0bbe1a625..4a17c62292b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c +@@ -4298,7 +4298,7 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco + if (!(flags & FP_ALLOW_UNSAFE_ALGEBRA)) + ins->flags |= VKD3DSI_PRECISE_X; + flags &= ~FP_ALLOW_UNSAFE_ALGEBRA; +- /* SPIR-V FPFastMathMode is only available in the Kernel executon model. */ ++ /* SPIR-V FPFastMathMode is only available in the Kernel execution model. */ + silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); + break; + case VKD3DSIH_IADD: +@@ -5211,7 +5211,7 @@ static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_in + instruction_dst_param_init_temp_vector(ins++, sm6); + state->temp_idx = 1; + +- /* DXIL does not have an instrinsic for sample info, and resinfo is expected to return ++ /* DXIL does not have an intrinsic for sample info, and resinfo is expected to return + * the sample count in .w for MS textures. The result is always a struct of 4 x uint32. */ + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_INFO); + ins->flags = VKD3DSI_SAMPLE_INFO_UINT; +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index e3ebbafb3f4..a1d1fd6572f 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -111,7 +111,7 @@ static void get_state_block_function_components(const struct state_block_functio + { + unsigned int i; + +- assert(comp_count <= info->max_args); ++ VKD3D_ASSERT(comp_count <= info->max_args); + + if (info->min_args == info->max_args) + { +@@ -205,6 +205,8 @@ struct fx_write_context + uint32_t sampler_state_count; + uint32_t depth_stencil_state_count; + uint32_t rasterizer_state_count; ++ uint32_t blend_state_count; ++ uint32_t string_count; + int status; + + bool child_effect; +@@ -565,6 +567,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + case HLSL_CLASS_PIXEL_SHADER: + return "PixelShader"; + ++ case HLSL_CLASS_STRING: ++ return "String"; ++ + default: + return type->name; + } +@@ -636,6 +641,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_STRING: + put_u32_unaligned(buffer, 2); + break; + +@@ -648,9 +655,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_NULL: + vkd3d_unreachable(); + +- case HLSL_CLASS_STRING: + case HLSL_CLASS_VOID: + FIXME("Writing type class %u is not implemented.\n", type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); +@@ -754,6 +761,14 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + { + put_u32_unaligned(buffer, 3); + } ++ else if (type->class == HLSL_CLASS_BLEND_STATE) ++ { ++ put_u32_unaligned(buffer, 2); ++ } ++ else if (type->class == HLSL_CLASS_STRING) ++ { ++ put_u32_unaligned(buffer, 1); ++ } + else if (hlsl_is_numeric_type(type)) + { + numeric_desc = get_fx_4_numeric_type_description(type, fx); +@@ -879,6 +894,13 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f + return offset; + } + ++static uint32_t get_fx_2_type_class(const struct hlsl_type *type) ++{ ++ if (type->class == HLSL_CLASS_MATRIX) ++ return D3DXPC_MATRIX_ROWS; ++ return hlsl_sm1_class(type); ++} ++ + static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, + struct fx_write_context *fx) + { +@@ -897,7 +919,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n + semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; + + offset = put_u32(buffer, hlsl_sm1_base_type(type)); +- put_u32(buffer, hlsl_sm1_class(type)); ++ put_u32(buffer, get_fx_2_type_class(type)); + put_u32(buffer, name_offset); + put_u32(buffer, semantic_offset); + put_u32(buffer, elements_count); +@@ -1078,12 +1100,14 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: + return false; + + case HLSL_CLASS_EFFECT_GROUP: + case HLSL_CLASS_PASS: + case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_NULL: + /* This cannot appear as an extern variable. */ + break; + } +@@ -1234,7 +1258,7 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl + + for (j = 0; j < comp_count; ++j) + { +- put_u32_unaligned(buffer, value->value.u); ++ put_u32_unaligned(buffer, value->number.u); + value++; + } + break; +@@ -1264,6 +1288,27 @@ static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hl + return offset; + } + ++static void write_fx_4_string_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) ++{ ++ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; ++ const struct hlsl_default_value *value = var->default_values; ++ struct vkd3d_bytecode_buffer *buffer = &fx->structured; ++ struct hlsl_ctx *ctx = fx->ctx; ++ uint32_t offset; ++ ++ if (!value) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "String objects have to be initialized."); ++ return; ++ } ++ ++ for (i = 0; i < elements_count; ++i, ++value) ++ { ++ offset = write_fx_4_string(value->string, fx); ++ put_u32(buffer, offset); ++ } ++} ++ + static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) + { + struct vkd3d_bytecode_buffer *buffer = &fx->structured; +@@ -1322,6 +1367,10 @@ static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_conte + offset = write_fx_4_default_value(var->data_type, var->default_values, fx); + put_u32(buffer, offset); + } ++ else if (type->class == HLSL_CLASS_STRING) ++ { ++ write_fx_4_string_initializer(var, fx); ++ } + else + { + hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); +@@ -1429,17 +1478,28 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl + set_u32(buffer, rhs_offset, value_offset); + } + +-static bool state_block_contains_state(const char *name, unsigned int start, struct hlsl_state_block *block) ++static bool state_block_contains_state(const struct hlsl_state_block_entry *entry, unsigned int start_index, ++ struct hlsl_state_block *block) + { + unsigned int i; + +- for (i = start; i < block->count; ++i) ++ for (i = start_index; i < block->count; ++i) + { +- if (block->entries[i]->is_function_call) ++ const struct hlsl_state_block_entry *cur = block->entries[i]; ++ ++ if (cur->is_function_call) + continue; + +- if (!ascii_strcasecmp(block->entries[i]->name, name)) +- return true; ++ if (ascii_strcasecmp(cur->name, entry->name)) ++ continue; ++ ++ if (cur->lhs_has_index != entry->lhs_has_index) ++ continue; ++ ++ if (cur->lhs_has_index && cur->lhs_index != entry->lhs_index) ++ continue; ++ ++ return true; + } + + return false; +@@ -1451,6 +1511,24 @@ struct replace_state_context + struct hlsl_ir_var *var; + }; + ++static bool lower_null_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) ++{ ++ struct hlsl_ir_node *c; ++ ++ if (instr->type != HLSL_IR_CONSTANT) ++ return false; ++ if (instr->data_type->class != HLSL_CLASS_NULL) ++ return false; ++ ++ if (!(c = hlsl_new_uint_constant(ctx, 0, &instr->loc))) ++ return false; ++ ++ list_add_before(&instr->entry, &c->entry); ++ hlsl_replace_node(instr, c); ++ ++ return true; ++} ++ + static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) + { + struct replace_state_context *replace_context = context; +@@ -1480,17 +1558,6 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no + return true; + } + +-static void fold_state_value(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry) +-{ +- bool progress; +- +- do +- { +- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); +- progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); +- } while (progress); +-} +- + enum state_property_component_type + { + FX_BOOL, +@@ -1505,6 +1572,9 @@ enum state_property_component_type + FX_TEXTURE, + FX_DEPTHSTENCILVIEW, + FX_RENDERTARGETVIEW, ++ FX_BLEND, ++ FX_VERTEXSHADER, ++ FX_PIXELSHADER, + }; + + static inline bool is_object_fx_type(enum state_property_component_type type) +@@ -1519,6 +1589,9 @@ static inline bool is_object_fx_type(enum state_property_component_type type) + case FX_TEXTURE: + case FX_RENDERTARGETVIEW: + case FX_DEPTHSTENCILVIEW: ++ case FX_BLEND: ++ case FX_VERTEXSHADER: ++ case FX_PIXELSHADER: + return true; + default: + return false; +@@ -1545,6 +1618,12 @@ static inline enum hlsl_type_class hlsl_type_class_from_fx_type(enum state_prope + return HLSL_CLASS_RENDER_TARGET_VIEW; + case FX_DEPTHSTENCILVIEW: + return HLSL_CLASS_DEPTH_STENCIL_VIEW; ++ case FX_BLEND: ++ return HLSL_CLASS_BLEND_STATE; ++ case FX_VERTEXSHADER: ++ return HLSL_CLASS_VERTEX_SHADER; ++ case FX_PIXELSHADER: ++ return HLSL_CLASS_PIXEL_SHADER; + default: + vkd3d_unreachable(); + } +@@ -1663,6 +1742,51 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + { NULL } + }; + ++ static const struct rhs_named_value blend_values[] = ++ { ++ { "ZERO", 1 }, ++ { "ONE", 2 }, ++ { "SRC_COLOR", 3 }, ++ { "INV_SRC_COLOR", 4 }, ++ { "SRC_ALPHA", 5 }, ++ { "INV_SRC_ALPHA", 6 }, ++ { "DEST_ALPHA", 7 }, ++ { "INV_DEST_ALPHA", 8 }, ++ { "DEST_COLOR", 9 }, ++ { "INV_DEST_COLOR", 10 }, ++ { "SRC_ALPHA_SAT", 11 }, ++ { "BLEND_FACTOR", 14 }, ++ { "INV_BLEND_FACTOR", 15 }, ++ { "SRC1_COLOR", 16 }, ++ { "INV_SRC1_COLOR", 17 }, ++ { "SRC1_ALPHA", 18 }, ++ { "INV_SRC1_ALPHA", 19 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value blendop_values[] = ++ { ++ { "ADD", 1 }, ++ { "SUBTRACT", 2 }, ++ { "REV_SUBTRACT", 3 }, ++ { "MIN", 4 }, ++ { "MAX", 5 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value bool_values[] = ++ { ++ { "FALSE", 0 }, ++ { "TRUE", 1 }, ++ { NULL } ++ }; ++ ++ static const struct rhs_named_value null_values[] = ++ { ++ { "NULL", 0 }, ++ { NULL } ++ }; ++ + static const struct state + { + const char *name; +@@ -1676,29 +1800,33 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + } + states[] = + { +- { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, +- { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, +- ++ { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, ++ { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, ++ { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, + { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, + { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, + +- { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, ++ { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, ++ { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, ++ { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, ++ { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, ++ { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, + + { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, + { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, +- { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14 }, ++ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, + { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, + { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, + { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, +- { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18 }, +- { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19 }, +- { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20 }, +- { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21 }, ++ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, ++ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, ++ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, ++ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, + +- { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22 }, ++ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, + { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, + { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, +- { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25 }, ++ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, + { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, + { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, + { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, +@@ -1720,12 +1848,45 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, + { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, + { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, +- { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55 }, ++ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, + + { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, + { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, + { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, + }; ++ ++ static const struct state fx_4_blend_states[] = ++ { ++ { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, ++ { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, ++ { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, ++ { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, ++ { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, ++ { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, ++ { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, ++ { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, ++ { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, ++ }; ++ ++ static const struct state fx_5_blend_states[] = ++ { ++ { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, ++ { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, ++ { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 38, blend_values }, ++ { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 39, blend_values }, ++ { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 40, blendop_values }, ++ { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 41, blend_values }, ++ { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 42, blend_values }, ++ { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 43, blendop_values }, ++ { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, ++ }; ++ ++ struct state_table ++ { ++ const struct state *ptr; ++ unsigned int count; ++ } table; ++ + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct replace_state_context replace_context; + struct hlsl_type *state_type = NULL; +@@ -1733,15 +1894,33 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + const struct state *state = NULL; + struct hlsl_ctx *ctx = fx->ctx; + enum hlsl_base_type base_type; +- struct hlsl_ir_load *load; + unsigned int i; + +- for (i = 0; i < ARRAY_SIZE(states); ++i) ++ if (type->class == HLSL_CLASS_BLEND_STATE) + { +- if (type->class == states[i].container +- && !ascii_strcasecmp(entry->name, states[i].name)) ++ if (ctx->profile->major_version == 4) + { +- state = &states[i]; ++ table.ptr = fx_4_blend_states; ++ table.count = ARRAY_SIZE(fx_4_blend_states); ++ } ++ else ++ { ++ table.ptr = fx_5_blend_states; ++ table.count = ARRAY_SIZE(fx_5_blend_states); ++ } ++ } ++ else ++ { ++ table.ptr = states; ++ table.count = ARRAY_SIZE(states); ++ } ++ ++ for (i = 0; i < table.count; ++i) ++ { ++ if (type->class == table.ptr[i].container ++ && !ascii_strcasecmp(entry->name, table.ptr[i].name)) ++ { ++ state = &table.ptr[i]; + break; + } + } +@@ -1786,8 +1965,9 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + replace_context.var = var; + + /* Turn named constants to actual constants. */ ++ hlsl_transform_ir(ctx, lower_null_constant, entry->instrs, NULL); + hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); +- fold_state_value(ctx, entry); ++ hlsl_run_const_passes(ctx, entry->instrs); + + /* Now cast and run folding again. */ + +@@ -1798,7 +1978,8 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + switch (node->type) + { + case HLSL_IR_LOAD: +- load = hlsl_ir_load(node); ++ { ++ struct hlsl_ir_load *load = hlsl_ir_load(node); + + if (load->src.path_len) + hlsl_fixme(ctx, &ctx->location, "Arrays are not supported for RHS."); +@@ -1810,6 +1991,26 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + } + + break; ++ } ++ case HLSL_IR_CONSTANT: ++ { ++ struct hlsl_ir_constant *c = hlsl_ir_constant(node); ++ struct hlsl_type *data_type = c->node.data_type; ++ ++ if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT) ++ { ++ if (c->value.u[0].u != 0) ++ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Only 0 integer constants are allowed for object-typed fields."); ++ } ++ else ++ { ++ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, ++ "Unexpected constant used for object-typed field."); ++ } ++ ++ break; ++ } + default: + hlsl_fixme(ctx, &ctx->location, "Unhandled node type for object-typed field."); + } +@@ -1857,11 +2058,27 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl + hlsl_src_remove(entry->args); + hlsl_src_from_node(entry->args, cast); + +- fold_state_value(ctx, entry); ++ hlsl_run_const_passes(ctx, entry->instrs); + } + } + +-static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct hlsl_state_block *block, ++static bool decompose_fx_4_state_add_entries(struct hlsl_state_block *block, unsigned int entry_index, ++ unsigned int count) ++{ ++ if (!vkd3d_array_reserve((void **)&block->entries, &block->capacity, block->count + count, sizeof(*block->entries))) ++ return false; ++ ++ if (entry_index != block->count - 1) ++ { ++ memmove(&block->entries[entry_index + count + 1], &block->entries[entry_index + 1], ++ (block->count - entry_index - 1) * sizeof(*block->entries)); ++ } ++ block->count += count; ++ ++ return true; ++} ++ ++static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, struct hlsl_state_block *block, + unsigned int entry_index, struct fx_write_context *fx) + { + struct hlsl_state_block_entry *entry = block->entries[entry_index]; +@@ -1891,15 +2108,8 @@ static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct h + return 1; + } + +- if (!vkd3d_array_reserve((void **)&block->entries, &block->capacity, block->count + entry->args_count - 1, +- sizeof(*block->entries))) ++ if (!decompose_fx_4_state_add_entries(block, entry_index, entry->args_count - 1)) + return 1; +- if (entry_index != block->count - 1) +- { +- memmove(&block->entries[entry_index + entry->args_count], &block->entries[entry_index + 1], +- (block->count - entry_index - 1) * sizeof(*block->entries)); +- } +- block->count += entry->args_count - 1; + + get_state_block_function_components(info, components, entry->args_count); + +@@ -1915,6 +2125,62 @@ static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct h + return entry->args_count; + } + ++/* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState ++ object, and only when fx_5_0 profile is used. */ ++static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, ++ unsigned int entry_index, struct fx_write_context *fx) ++{ ++ static const char *states[] = { "SrcBlend", "DestBlend", "BlendOp", "SrcBlendAlpha", "DestBlendAlpha", "BlendOpAlpha" }; ++ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); ++ struct hlsl_state_block_entry *entry = block->entries[entry_index]; ++ static const unsigned int array_size = 8; ++ struct hlsl_ctx *ctx = fx->ctx; ++ bool found = false; ++ unsigned int i; ++ ++ if (type->class != HLSL_CLASS_BLEND_STATE) ++ return 1; ++ if (ctx->profile->major_version != 5) ++ return 1; ++ if (entry->lhs_has_index) ++ return 1; ++ ++ for (i = 0; i < ARRAY_SIZE(states); ++i) ++ { ++ if (!ascii_strcasecmp(entry->name, states[i])) ++ { ++ found = true; ++ break; ++ } ++ } ++ ++ if (!found) ++ return 1; ++ ++ if (!decompose_fx_4_state_add_entries(block, entry_index, array_size - 1)) ++ return 1; ++ ++ block->entries[entry_index]->lhs_has_index = true; ++ for (i = 1; i < array_size; ++i) ++ { ++ block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, ++ entry->name, true, i, 0); ++ } ++ ++ return array_size; ++} ++ ++static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct hlsl_state_block *block, ++ unsigned int entry_index, struct fx_write_context *fx) ++{ ++ struct hlsl_state_block_entry *entry = block->entries[entry_index]; ++ ++ if (entry->is_function_call) ++ return decompose_fx_4_state_function_call(var, block, entry_index, fx); ++ ++ return decompose_fx_4_state_block_expand_array(var, block, entry_index, fx); ++} ++ + static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, + uint32_t count_offset, struct fx_write_context *fx) + { +@@ -1936,7 +2202,7 @@ static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_i + struct hlsl_state_block_entry *entry = block->entries[i]; + + /* Skip if property is reassigned later. This will use the last assignment. */ +- if (state_block_contains_state(entry->name, i + 1, block)) ++ if (state_block_contains_state(entry, i + 1, block)) + continue; + + /* Resolve special constant names and property names. */ +@@ -2069,6 +2335,16 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ + fx->rasterizer_state_count += elements_count; + break; + ++ case HLSL_CLASS_BLEND_STATE: ++ write_fx_4_state_object_initializer(var, fx); ++ fx->blend_state_count += elements_count; ++ break; ++ ++ case HLSL_CLASS_STRING: ++ write_fx_4_string_initializer(var, fx); ++ fx->string_count += elements_count; ++ break; ++ + default: + hlsl_fixme(ctx, &ctx->location, "Writing initializer for object class %u is not implemented.", + type->class); +@@ -2170,6 +2446,9 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_VERTEX_SHADER: ++ case HLSL_CLASS_STRING: + return true; + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: +@@ -2183,8 +2462,6 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc + if (type->e.resource.rasteriser_ordered) + return false; + return true; +- case HLSL_CLASS_VERTEX_SHADER: +- return true; + + default: + return false; +@@ -2237,10 +2514,10 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, fx.shared_object_count); + put_u32(&buffer, fx.technique_count); + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ +- put_u32(&buffer, 0); /* String count. */ ++ put_u32(&buffer, fx.string_count); + put_u32(&buffer, fx.texture_count); + put_u32(&buffer, fx.depth_stencil_state_count); +- put_u32(&buffer, 0); /* Blend state count. */ ++ put_u32(&buffer, fx.blend_state_count); + put_u32(&buffer, fx.rasterizer_state_count); + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); +@@ -2295,10 +2572,10 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) + put_u32(&buffer, fx.shared_object_count); + put_u32(&buffer, fx.technique_count); + size_offset = put_u32(&buffer, 0); /* Unstructured size. */ +- put_u32(&buffer, 0); /* String count. */ ++ put_u32(&buffer, fx.string_count); + put_u32(&buffer, fx.texture_count); + put_u32(&buffer, fx.depth_stencil_state_count); +- put_u32(&buffer, 0); /* Blend state count. */ ++ put_u32(&buffer, fx.blend_state_count); + put_u32(&buffer, fx.rasterizer_state_count); + put_u32(&buffer, fx.sampler_state_count); + put_u32(&buffer, fx.rtv_count); +diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c +index 10e12ea56f2..d1f02ab568b 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c +@@ -115,7 +115,7 @@ static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator + + if (reg->non_uniform) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Internal compiler error: Unhandled 'non-uniform' modifer."); ++ "Internal compiler error: Unhandled 'non-uniform' modifier."); + if (vsir_src->modifiers) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); +@@ -138,10 +138,10 @@ static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_gener + + if (ins->flags & VKD3DSI_PRECISE_XYZW) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Internal compiler error: Unhandled 'precise' modifer."); ++ "Internal compiler error: Unhandled 'precise' modifier."); + if (vsir_dst->reg.non_uniform) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, +- "Internal compiler error: Unhandled 'non-uniform' modifer."); ++ "Internal compiler error: Unhandled 'non-uniform' modifier."); + + glsl_dst->vsir = vsir_dst; + glsl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +index 7f85195382d..bd5baacd83d 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c +@@ -167,7 +167,14 @@ void hlsl_free_var(struct hlsl_ir_var *decl) + for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) + vkd3d_free((void *)decl->objects_usage[k]); + +- vkd3d_free(decl->default_values); ++ if (decl->default_values) ++ { ++ unsigned int component_count = hlsl_type_component_count(decl->data_type); ++ ++ for (k = 0; k < component_count; ++k) ++ vkd3d_free((void *)decl->default_values[k].string); ++ vkd3d_free(decl->default_values); ++ } + + for (i = 0; i < decl->state_block_count; ++i) + hlsl_free_state_block(decl->state_blocks[i]); +@@ -385,6 +392,8 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + break; + } + } +@@ -459,6 +468,8 @@ static bool type_is_single_component(const struct hlsl_type *type) + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + return true; + + case HLSL_CLASS_VECTOR: +@@ -615,6 +626,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: + VKD3D_ASSERT(idx == 0); + break; + +@@ -624,6 +636,7 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty + case HLSL_CLASS_VOID: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_CONSTANT_BUFFER: ++ case HLSL_CLASS_NULL: + vkd3d_unreachable(); + } + type = next_type; +@@ -922,6 +935,7 @@ static const char * get_case_insensitive_typename(const char *name) + "texture", + "vector", + "vertexshader", ++ "string", + }; + unsigned int i; + +@@ -1019,6 +1033,8 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + return 1; + + case HLSL_CLASS_EFFECT_GROUP: +@@ -1110,6 +1126,8 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + return true; + } + +@@ -1459,7 +1477,7 @@ struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *t + { + struct hlsl_ir_constant *c; + +- VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR); ++ VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR || type->class == HLSL_CLASS_NULL); + + if (!(c = hlsl_alloc(ctx, sizeof(*c)))) + return NULL; +@@ -1522,6 +1540,12 @@ struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char * + return &s->node; + } + ++struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_constant_value value = { 0 }; ++ return hlsl_new_constant(ctx, ctx->builtin_types.null, &value, loc); ++} ++ + struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], + struct hlsl_type *data_type, const struct vkd3d_shader_location *loc) +@@ -2562,6 +2586,8 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + break; + } + +@@ -3262,9 +3288,15 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) + vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); + for (k = 0; k < component_count; ++k) + { +- if (k % 4 == 0) ++ bool is_string = var->default_values[k].string; ++ ++ if (k % 4 == 0 || is_string) + vkd3d_string_buffer_printf(&buffer, "\n "); +- vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].value.u); ++ ++ if (is_string) ++ vkd3d_string_buffer_printf(&buffer, " %s", debugstr_a(var->default_values[k].string)); ++ else ++ vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].number.u); + } + vkd3d_string_buffer_printf(&buffer, "\n"); + +@@ -3922,8 +3954,10 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + ctx->builtin_types.sampler[bt] = type; + } + +- ctx->builtin_types.string = hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING); + ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); ++ ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); ++ ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); ++ hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); +@@ -3937,6 +3971,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DomainShader", HLSL_CLASS_DOMAIN_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "HullShader", HLSL_CLASS_HULL_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "GeometryShader", HLSL_CLASS_GEOMETRY_SHADER)); ++ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "BlendState", HLSL_CLASS_BLEND_STATE)); + + for (i = 0; i < ARRAY_SIZE(effect_types); ++i) + { +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 7e8cd774ae2..22e25b23988 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -96,7 +96,9 @@ enum hlsl_type_class + HLSL_CLASS_HULL_SHADER, + HLSL_CLASS_GEOMETRY_SHADER, + HLSL_CLASS_CONSTANT_BUFFER, ++ HLSL_CLASS_BLEND_STATE, + HLSL_CLASS_VOID, ++ HLSL_CLASS_NULL, + }; + + enum hlsl_base_type +@@ -408,7 +410,7 @@ struct hlsl_attribute + + /* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a + * starting point of their allocation. They are available through the register(·) and the +- * packoffset(·) syntaxes, respectivelly. ++ * packoffset(·) syntaxes, respectively. + * The constant buffer offset is measured register components. */ + struct hlsl_reg_reservation + { +@@ -454,8 +456,10 @@ struct hlsl_ir_var + * This pointer is NULL for others. */ + struct hlsl_default_value + { ++ /* Default value, in case the component is a string, otherwise it is NULL. */ ++ const char *string; + /* Default value, in case the component is a numeric value. */ +- union hlsl_constant_value_component value; ++ union hlsl_constant_value_component number; + } *default_values; + + /* A dynamic array containing the state block on the variable's declaration, if any. +@@ -998,6 +1002,7 @@ struct hlsl_ctx + struct hlsl_type *sampler[HLSL_SAMPLER_DIM_LAST_SAMPLER + 1]; + struct hlsl_type *string; + struct hlsl_type *Void; ++ struct hlsl_type *null; + } builtin_types; + + /* List of the instruction nodes for initializing static variables. */ +@@ -1450,6 +1455,7 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim + struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); + struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, + const struct vkd3d_shader_location *loc); ++struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc); + struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, + const struct vkd3d_shader_location *loc); + struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +index b4db142f6c2..0c02b27817e 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l +@@ -107,6 +107,7 @@ matrix {return KW_MATRIX; } + namespace {return KW_NAMESPACE; } + nointerpolation {return KW_NOINTERPOLATION; } + noperspective {return KW_NOPERSPECTIVE; } ++NULL {return KW_NULL; } + out {return KW_OUT; } + packoffset {return KW_PACKOFFSET; } + pass {return KW_PASS; } +@@ -144,6 +145,7 @@ stateblock {return KW_STATEBLOCK; } + stateblock_state {return KW_STATEBLOCK_STATE; } + static {return KW_STATIC; } + string {return KW_STRING; } ++String {return KW_STRING; } + struct {return KW_STRUCT; } + switch {return KW_SWITCH; } + tbuffer {return KW_TBUFFER; } +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +index 30bd53d0c49..3f319dea0d8 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y +@@ -304,6 +304,26 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ + } + } + ++ if (src->class == HLSL_CLASS_NULL) ++ { ++ switch (dst->class) ++ { ++ case HLSL_CLASS_DEPTH_STENCIL_STATE: ++ case HLSL_CLASS_DEPTH_STENCIL_VIEW: ++ case HLSL_CLASS_PIXEL_SHADER: ++ case HLSL_CLASS_RASTERIZER_STATE: ++ case HLSL_CLASS_RENDER_TARGET_VIEW: ++ case HLSL_CLASS_SAMPLER: ++ case HLSL_CLASS_STRING: ++ case HLSL_CLASS_TEXTURE: ++ case HLSL_CLASS_UAV: ++ case HLSL_CLASS_VERTEX_SHADER: ++ return true; ++ default: ++ break; ++ } ++ } ++ + return hlsl_types_are_componentwise_equal(ctx, src, dst); + } + +@@ -331,6 +351,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl + if (hlsl_types_are_equal(src_type, dst_type)) + return node; + ++ if (src_type->class == HLSL_CLASS_NULL) ++ return node; ++ + if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) + { + unsigned int src_comp_count = hlsl_type_component_count(src_type); +@@ -575,11 +598,10 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); + } + +-static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, ++static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) + { +- union hlsl_constant_value_component ret = {0}; +- struct hlsl_ir_constant *constant; ++ struct hlsl_default_value ret = {0}; + struct hlsl_ir_node *node; + struct hlsl_block expr; + struct hlsl_src src; +@@ -631,8 +653,16 @@ static union hlsl_constant_value_component evaluate_static_expression(struct hls + + if (node->type == HLSL_IR_CONSTANT) + { +- constant = hlsl_ir_constant(node); +- ret = constant->value.u[0]; ++ struct hlsl_ir_constant *constant = hlsl_ir_constant(node); ++ ++ ret.number = constant->value.u[0]; ++ } ++ else if (node->type == HLSL_IR_STRING_CONSTANT) ++ { ++ struct hlsl_ir_string_constant *string = hlsl_ir_string_constant(node); ++ ++ if (!(ret.string = vkd3d_strdup(string->string))) ++ return ret; + } + else if (node->type == HLSL_IR_STRING_CONSTANT) + { +@@ -652,10 +682,11 @@ static union hlsl_constant_value_component evaluate_static_expression(struct hls + static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) + { +- union hlsl_constant_value_component res; ++ struct hlsl_default_value res; + + res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); +- return res.u; ++ VKD3D_ASSERT(!res.string); ++ return res.number.u; + } + + static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, +@@ -1868,49 +1899,51 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls + return add_expr(ctx, instrs, op, args, ret_type, loc); + } + +-static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, +- struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) ++static struct hlsl_ir_node *add_binary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, ++ struct hlsl_ir_node *lhs, struct hlsl_ir_node *rhs, const struct vkd3d_shader_location *loc) + { +- struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); +- +- hlsl_block_add_block(block1, block2); +- destroy_block(block2); +- + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_DIV: + case HLSL_OP2_MOD: + case HLSL_OP2_MUL: +- add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); +- break; ++ return add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, loc); + + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: +- add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); +- break; ++ return add_binary_bitwise_expr(ctx, block, op, lhs, rhs, loc); + + case HLSL_OP2_LESS: + case HLSL_OP2_GEQUAL: + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: +- add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); +- break; ++ return add_binary_comparison_expr(ctx, block, op, lhs, rhs, loc); + + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: +- add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); +- break; ++ return add_binary_logical_expr(ctx, block, op, lhs, rhs, loc); + + case HLSL_OP2_LSHIFT: + case HLSL_OP2_RSHIFT: +- add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); +- break; ++ return add_binary_shift_expr(ctx, block, op, lhs, rhs, loc); + + default: + vkd3d_unreachable(); + } ++} ++ ++static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, ++ struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) ++{ ++ struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); ++ ++ hlsl_block_add_block(block1, block2); ++ destroy_block(block2); ++ ++ if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL) ++ return NULL; + + return block1; + } +@@ -2034,7 +2067,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo + enum hlsl_ir_expr_op op = op_from_assignment(assign_op); + + VKD3D_ASSERT(op); +- if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) ++ if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) + return NULL; + } + +@@ -2350,7 +2383,7 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i + + if (!hlsl_clone_block(ctx, &block, instrs)) + return; +- default_value.value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); ++ default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); + + if (dst->is_param) + dst_index = *store_index; +@@ -2908,14 +2941,17 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu + struct hlsl_ir_node *comp; + struct hlsl_block store_block; + +- value.u[0] = param->default_values[j].value; +- if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) +- return false; +- hlsl_block_add_instr(args->instrs, comp); ++ if (!param->default_values[j].string) ++ { ++ value.u[0] = param->default_values[j].number; ++ if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) ++ return false; ++ hlsl_block_add_instr(args->instrs, comp); + +- if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) +- return false; +- hlsl_block_add_block(args->instrs, &store_block); ++ if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) ++ return false; ++ hlsl_block_add_block(args->instrs, &store_block); ++ } + } + } + +@@ -6050,6 +6086,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h + %token KW_NAMESPACE + %token KW_NOINTERPOLATION + %token KW_NOPERSPECTIVE ++%token KW_NULL + %token KW_OUT + %token KW_PACKOFFSET + %token KW_PASS +@@ -7304,12 +7341,6 @@ type_no_void: + { + validate_texture_format_type(ctx, $3, &@3); + +- if (hlsl_version_lt(ctx, 4, 1)) +- { +- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +- "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); +- } +- + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); + } + | texture_ms_type '<' type ',' shift_expr '>' +@@ -7433,6 +7464,10 @@ type_no_void: + { + $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); + } ++ | KW_BLENDSTATE ++ { ++ $$ = hlsl_get_type(ctx->cur_scope, "BlendState", true, true); ++ } + + type: + type_no_void +@@ -8323,6 +8358,18 @@ primary_expr: + YYABORT; + } + } ++ | KW_NULL ++ { ++ struct hlsl_ir_node *c; ++ ++ if (!(c = hlsl_new_null_constant(ctx, &@1))) ++ YYABORT; ++ if (!($$ = make_block(ctx, c))) ++ { ++ hlsl_free_instr(c); ++ YYABORT; ++ } ++ } + | VAR_IDENTIFIER + { + struct hlsl_ir_load *load; +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 049461cdb7d..a695eefabf6 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -1648,6 +1648,8 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + break; + + case HLSL_CLASS_MATRIX: +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index c1b8582af6d..6dbe30b1553 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -3831,11 +3831,16 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) + { + struct vsir_block *block2 = &cfg->blocks[j]; + +- if (block2->label == 0) ++ if (block2->label == 0 || !vsir_block_dominates(block, block2)) + continue; + +- if (vsir_block_dominates(block, block2)) +- vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); ++ if (cfg->debug_buffer.content_size > 512) ++ { ++ TRACE("%s...\n", cfg->debug_buffer.buffer); ++ vkd3d_string_buffer_clear(&cfg->debug_buffer); ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates: ...", block->label); ++ } ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); + } + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); +@@ -3927,7 +3932,16 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); + + for (k = 0; k < loop->count; ++k) ++ { ++ if (cfg->debug_buffer.content_size > 512) ++ { ++ TRACE("%s...\n", cfg->debug_buffer.buffer); ++ vkd3d_string_buffer_clear(&cfg->debug_buffer); ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop: ...", ++ block->label, header->label); ++ } + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); ++ } + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); +@@ -4150,7 +4164,15 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); + + for (i = 0; i < cfg->order.count; ++i) ++ { ++ if (cfg->debug_buffer.content_size > 512) ++ { ++ TRACE("%s...\n", cfg->debug_buffer.buffer); ++ vkd3d_string_buffer_clear(&cfg->debug_buffer); ++ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order: ..."); ++ } + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); ++ } + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); +@@ -4204,7 +4226,7 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ + ACTION_EXTEND, + } action = ACTION_CREATE_NEW; + +- /* We've already contructed loop intervals for the back ++ /* We've already constructed loop intervals for the back + * edges, there's nothing more to do. */ + if (vsir_block_dominates(successor, block)) + continue; +@@ -4462,7 +4484,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) + VKD3D_ASSERT(inner_loop->type == STRUCTURE_TYPE_LOOP); + + /* Otherwise, if one of the branches is +- * continueing the inner loop we're inside, ++ * continue-ing the inner loop we're inside, + * make sure it's the false branch (because it + * will be optimized out later). */ + if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) +@@ -5104,14 +5126,14 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, + struct vsir_cfg_emit_target *target = cfg->target; + const struct vkd3d_shader_location no_loc = {0}; + /* Encode the jump target as the loop index plus a bit to remember whether +- * we're breaking or continueing. */ ++ * we're breaking or continue-ing. */ + unsigned int jump_target = jump->target << 1; + enum vkd3d_shader_opcode opcode; + + switch (jump->type) + { + case JUMP_CONTINUE: +- /* If we're continueing the loop we're directly inside, then we can emit a ++ /* If we're continue-ing the loop we're directly inside, then we can emit a + * `continue'. Otherwise we first have to break all the loops between here + * and the loop to continue, recording our intention to continue + * in the lowest bit of jump_target. */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index d6d5bbc1c07..84f641cc316 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -22,6 +22,7 @@ + */ + + #include "hlsl.h" ++#include "vkd3d_shader_private.h" + + #define SM4_MAX_SRC_COUNT 6 + #define SM4_MAX_DST_COUNT 2 +@@ -3006,6 +3007,8 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: ++ case HLSL_CLASS_BLEND_STATE: ++ case HLSL_CLASS_NULL: + break; + } + vkd3d_unreachable(); +@@ -3107,8 +3110,6 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) + { + switch (type->class) + { +- case HLSL_CLASS_ARRAY: +- return sm4_resource_type(type->e.array.type); + case HLSL_CLASS_SAMPLER: + return D3D_SIT_SAMPLER; + case HLSL_CLASS_TEXTURE: +@@ -3124,9 +3125,6 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) + + static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) + { +- if (type->class == HLSL_CLASS_ARRAY) +- return sm4_resource_format(type->e.array.type); +- + switch (type->e.resource.format->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: +@@ -3151,9 +3149,6 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type + + static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) + { +- if (type->class == HLSL_CLASS_ARRAY) +- return sm4_rdef_resource_dimension(type->e.array.type); +- + switch (type->sampler_dim) + { + case HLSL_SAMPLER_DIM_1D: +@@ -3190,11 +3185,18 @@ struct extern_resource + const struct hlsl_buffer *buffer; + + char *name; +- struct hlsl_type *data_type; + bool is_user_packed; + ++ /* The data type of a single component of the resource. ++ * This might be different from the data type of the resource itself in 4.0 ++ * profiles, where an array (or multi-dimensional array) is handled as a ++ * single resource, unlike in 5.0. */ ++ struct hlsl_type *component_type; ++ + enum hlsl_regset regset; + unsigned int id, space, index, bind_count; ++ ++ struct vkd3d_shader_location loc; + }; + + static int sm4_compare_extern_resources(const void *a, const void *b) +@@ -3289,14 +3291,16 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; +- extern_resources[*count].data_type = component_type; + extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + ++ extern_resources[*count].component_type = component_type; ++ + extern_resources[*count].regset = regset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].space = var->regs[regset].space; + extern_resources[*count].index = var->regs[regset].index + regset_offset; + extern_resources[*count].bind_count = 1; ++ extern_resources[*count].loc = var->loc; + + ++*count; + } +@@ -3333,17 +3337,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + extern_resources[*count].buffer = NULL; + + extern_resources[*count].name = name; +- extern_resources[*count].data_type = var->data_type; + /* For some reason 5.1 resources aren't marked as + * user-packed, but cbuffers still are. */ + extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) + && !!var->reg_reservation.reg_type; + ++ extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); ++ + extern_resources[*count].regset = r; + extern_resources[*count].id = var->regs[r].id; + extern_resources[*count].space = var->regs[r].space; + extern_resources[*count].index = var->regs[r].index; + extern_resources[*count].bind_count = var->bind_count[r]; ++ extern_resources[*count].loc = var->loc; + + ++*count; + } +@@ -3374,14 +3380,16 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un + extern_resources[*count].buffer = buffer; + + extern_resources[*count].name = name; +- extern_resources[*count].data_type = NULL; + extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; + ++ extern_resources[*count].component_type = NULL; ++ + extern_resources[*count].regset = HLSL_REGSET_NUMERIC; + extern_resources[*count].id = buffer->reg.id; + extern_resources[*count].space = buffer->reg.space; + extern_resources[*count].index = buffer->reg.index; + extern_resources[*count].bind_count = 1; ++ extern_resources[*count].loc = buffer->loc; + + ++*count; + } +@@ -3458,13 +3466,13 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + if (resource->buffer) + put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); + else +- put_u32(&buffer, sm4_resource_type(resource->data_type)); ++ put_u32(&buffer, sm4_resource_type(resource->component_type)); + if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) + { +- unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; ++ unsigned int dimx = resource->component_type->e.resource.format->dimx; + +- put_u32(&buffer, sm4_resource_format(resource->data_type)); +- put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); ++ put_u32(&buffer, sm4_resource_format(resource->component_type)); ++ put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); + put_u32(&buffer, ~0u); /* FIXME: multisample count */ + flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; + } +@@ -3593,6 +3601,13 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + unsigned int comp_offset; + enum hlsl_regset regset; + ++ if (comp_type->class == HLSL_CLASS_STRING) ++ { ++ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Cannot write string default value."); ++ continue; ++ } ++ + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { +@@ -3600,7 +3615,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + hlsl_fixme(ctx, &var->loc, "Write double default values."); + + set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), +- var->default_values[k].value.u); ++ var->default_values[k].number.u); + } + } + } +@@ -4269,7 +4284,6 @@ static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const st + + static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) + { +- struct hlsl_type *component_type; + unsigned int i; + struct sm4_instruction instr = + { +@@ -4279,13 +4293,11 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex + .dst_count = 1, + }; + +- component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); ++ VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); + +- if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) ++ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; + +- VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); +- + for (i = 0; i < resource->bind_count; ++i) + { + if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) +@@ -4317,11 +4329,12 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; + struct hlsl_type *component_type; + struct sm4_instruction instr; ++ bool multisampled; + unsigned int i; + + VKD3D_ASSERT(resource->regset == regset); + +- component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); ++ component_type = resource->component_type; + + for (i = 0; i < resource->bind_count; ++i) + { +@@ -4339,6 +4352,16 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + .idx_count = 1, + }; + ++ multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS ++ || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; ++ ++ if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) ++ { ++ hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, ++ "Multisampled texture object declaration needs sample count for profile %s.", ++ tpf->ctx->profile->name); ++ } ++ + if (hlsl_version_ge(tpf->ctx, 5, 1)) + { + VKD3D_ASSERT(!i); +@@ -4358,18 +4381,18 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + + if (uav) + { +- switch (resource->data_type->sampler_dim) ++ switch (component_type->sampler_dim) + { +- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: +- instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; +- instr.byte_stride = resource->data_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; +- break; +- default: +- instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; +- break; ++ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: ++ instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; ++ instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; ++ break; ++ default: ++ instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; ++ break; + } + +- if (resource->data_type->e.resource.rasteriser_ordered) ++ if (component_type->e.resource.rasteriser_ordered) + instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT; + } + else +@@ -4378,11 +4401,8 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex + } + instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); + +- if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS +- || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) +- { ++ if (multisampled) + instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; +- } + + write_sm4_instruction(tpf, &instr); + } +@@ -6082,7 +6102,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) + extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); + for (unsigned int i = 0; i < extern_resources_count; ++i) + { +- if (extern_resources[i].data_type && extern_resources[i].data_type->e.resource.rasteriser_ordered) ++ if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) + *flags |= VKD3D_SM4_REQUIRES_ROVS; + } + sm4_free_extern_resources(extern_resources, extern_resources_count); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 13b4dab76d1..ef66a8ca07a 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -51,7 +51,6 @@ + #include "vkd3d_shader.h" + #include "wine/list.h" + +-#include + #include + #include + #include +diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c +index ac29088b9cb..6d6820d3752 100644 +--- a/libs/vkd3d/libs/vkd3d/resource.c ++++ b/libs/vkd3d/libs/vkd3d/resource.c +@@ -2184,7 +2184,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, + goto allocate_memory; + } + +- /* Syncronisation is not required for binding, but vkMapMemory() may be called ++ /* Synchronisation is not required for binding, but vkMapMemory() may be called + * from another thread and it requires exclusive access. */ + vkd3d_mutex_lock(&heap->mutex); + +diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c +index 0bdb7ea524d..519d1a2d85f 100644 +--- a/libs/vkd3d/libs/vkd3d/state.c ++++ b/libs/vkd3d/libs/vkd3d/state.c +@@ -635,14 +635,18 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat + for (i = 0; i < desc->NumParameters; ++i) + { + const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; ++ D3D12_SHADER_VISIBILITY visibility; ++ + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) + continue; + +- VKD3D_ASSERT(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); +- push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL +- : stage_flags_from_visibility(p->ShaderVisibility); +- push_constants[p->ShaderVisibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); ++ visibility = use_vk_heaps ? D3D12_SHADER_VISIBILITY_ALL : p->ShaderVisibility; ++ VKD3D_ASSERT(visibility <= D3D12_SHADER_VISIBILITY_PIXEL); ++ ++ push_constants[visibility].stageFlags = stage_flags_from_visibility(visibility); ++ push_constants[visibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); + } ++ + if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) + { + /* When D3D12_SHADER_VISIBILITY_ALL is used we use a single push +diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +index a4bd2202f39..ba4e2e8488d 100644 +--- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h ++++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h +@@ -37,7 +37,6 @@ + #include "vkd3d.h" + #include "vkd3d_shader.h" + +-#include + #include + #include + #include +@@ -679,7 +678,7 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * + void *view; + + /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors +- * from multiple threads without syncronisation. This is apparently valid in Windows. */ ++ * from multiple threads without synchronisation. This is apparently valid in Windows. */ + for (;;) + { + do +-- +2.43.0 + diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-0202393d41f00d8c9f20f59ec080b833b54.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-0202393d41f00d8c9f20f59ec080b833b54.patch deleted file mode 100644 index df088144..00000000 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-0202393d41f00d8c9f20f59ec080b833b54.patch +++ /dev/null @@ -1,398 +0,0 @@ -From fc4be1e672aa26b5eb27d1bfb280c23189664cb1 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Wed, 17 Jul 2024 08:43:16 +1000 -Subject: [PATCH] Updated vkd3d to 0202393d41f00d8c9f20f59ec080b833b5436f5a. - ---- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 20 +++ - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 1 + - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 5 +- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 174 +++++++++++++++++++- - libs/vkd3d/libs/vkd3d-shader/ir.c | 57 +++++++ - 5 files changed, 248 insertions(+), 9 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index abfbd461b33..492ad9b69fb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -2371,6 +2371,17 @@ static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, - } - } - -+static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src) -+{ -+ if (op == HLSL_OP1_COS_REDUCED) -+ assert(dst->writemask == VKD3DSP_WRITEMASK_0); -+ else /* HLSL_OP1_SIN_REDUCED */ -+ assert(dst->writemask == VKD3DSP_WRITEMASK_1); -+ -+ d3dbc_write_unary_op(d3dbc, D3DSIO_SINCOS, dst, src, 0, 0); -+} -+ - static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -@@ -2439,6 +2450,11 @@ static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_ - d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); - break; - -+ case HLSL_OP1_COS_REDUCED: -+ case HLSL_OP1_SIN_REDUCED: -+ d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); -+ break; -+ - case HLSL_OP2_ADD: - d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; -@@ -2499,6 +2515,10 @@ static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_ - d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - -+ case HLSL_OP3_MAD: -+ d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ break; -+ - default: - hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op)); - break; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index acf50869a40..1526d7b02a9 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -2849,6 +2849,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP3_CMP] = "cmp", - [HLSL_OP3_DP2ADD] = "dp2add", - [HLSL_OP3_TERNARY] = "ternary", -+ [HLSL_OP3_MAD] = "mad", - }; - - return op_names[op]; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 5832958712a..4411546e269 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -645,7 +645,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_CAST, - HLSL_OP1_CEIL, - HLSL_OP1_COS, -- HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ -+ HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */ - HLSL_OP1_DSX, - HLSL_OP1_DSX_COARSE, - HLSL_OP1_DSX_FINE, -@@ -666,7 +666,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_SAT, - HLSL_OP1_SIGN, - HLSL_OP1_SIN, -- HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ -+ HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ - HLSL_OP1_SQRT, - HLSL_OP1_TRUNC, - -@@ -699,6 +699,7 @@ enum hlsl_ir_expr_op - * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ - HLSL_OP3_CMP, - HLSL_OP3_TERNARY, -+ HLSL_OP3_MAD, - }; - - #define HLSL_MAX_OPERANDS 3 -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 02884df9d76..26386c0b8df 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -20,6 +20,7 @@ - - #include "hlsl.h" - #include -+#include - - /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ - static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -3016,6 +3017,108 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return true; - } - -+/* Lower SIN/COS to SINCOS for SM1. */ -+static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *arg, *half, *two_pi, *reciprocal_two_pi, *neg_pi; -+ struct hlsl_constant_value half_value, two_pi_value, reciprocal_two_pi_value, neg_pi_value; -+ struct hlsl_ir_node *mad, *frc, *reduced; -+ struct hlsl_type *type; -+ struct hlsl_ir_expr *expr; -+ enum hlsl_ir_expr_op op; -+ struct hlsl_ir_node *sincos; -+ int i; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ -+ if (expr->op == HLSL_OP1_SIN) -+ op = HLSL_OP1_SIN_REDUCED; -+ else if (expr->op == HLSL_OP1_COS) -+ op = HLSL_OP1_COS_REDUCED; -+ else -+ return false; -+ -+ arg = expr->operands[0].node; -+ type = arg->data_type; -+ -+ /* Reduce the range of the input angles to [-pi, pi]. */ -+ for (i = 0; i < type->dimx; ++i) -+ { -+ half_value.u[i].f = 0.5; -+ two_pi_value.u[i].f = 2.0 * M_PI; -+ reciprocal_two_pi_value.u[i].f = 1.0 / (2.0 * M_PI); -+ neg_pi_value.u[i].f = -M_PI; -+ } -+ -+ if (!(half = hlsl_new_constant(ctx, type, &half_value, &instr->loc)) -+ || !(two_pi = hlsl_new_constant(ctx, type, &two_pi_value, &instr->loc)) -+ || !(reciprocal_two_pi = hlsl_new_constant(ctx, type, &reciprocal_two_pi_value, &instr->loc)) -+ || !(neg_pi = hlsl_new_constant(ctx, type, &neg_pi_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, half); -+ hlsl_block_add_instr(block, two_pi); -+ hlsl_block_add_instr(block, reciprocal_two_pi); -+ hlsl_block_add_instr(block, neg_pi); -+ -+ if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half))) -+ return false; -+ hlsl_block_add_instr(block, mad); -+ if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mad, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, frc); -+ if (!(reduced = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, frc, two_pi, neg_pi))) -+ return false; -+ hlsl_block_add_instr(block, reduced); -+ -+ if (type->dimx == 1) -+ { -+ if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, sincos); -+ } -+ else -+ { -+ struct hlsl_ir_node *comps[4] = {0}; -+ struct hlsl_ir_var *var; -+ struct hlsl_deref var_deref; -+ struct hlsl_ir_load *var_load; -+ -+ for (i = 0; i < type->dimx; ++i) -+ { -+ uint32_t s = hlsl_swizzle_from_writemask(1 << i); -+ -+ if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, reduced, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, comps[i]); -+ } -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) -+ return false; -+ hlsl_init_simple_deref_from_var(&var_deref, var); -+ -+ for (i = 0; i < type->dimx; ++i) -+ { -+ struct hlsl_block store_block; -+ -+ if (!(sincos = hlsl_new_unary_expr(ctx, op, comps[i], &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, sincos); -+ -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, sincos)) -+ return false; -+ hlsl_block_add_block(block, &store_block); -+ } -+ -+ if (!(var_load = hlsl_new_load_index(ctx, &var_deref, NULL, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, &var_load->node); -+ } -+ -+ return true; -+} -+ - static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; -@@ -4230,6 +4333,30 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - return ret; - } - -+/* Allocate a register with writemask, while reserving reg_writemask. */ -+static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, -+ unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) -+{ -+ struct hlsl_reg ret = {0}; -+ uint32_t reg_idx; -+ -+ assert((reg_writemask & writemask) == writemask); -+ -+ for (reg_idx = 0;; ++reg_idx) -+ { -+ if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) -+ break; -+ } -+ -+ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); -+ -+ ret.id = reg_idx; -+ ret.allocation_size = 1; -+ ret.writemask = writemask; -+ ret.allocated = true; -+ return ret; -+} -+ - static bool is_range_available(const struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) - { -@@ -4433,6 +4560,44 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) - } - } - -+static void allocate_instr_temp_register(struct hlsl_ctx *ctx, -+ struct hlsl_ir_node *instr, struct register_allocator *allocator) -+{ -+ unsigned int reg_writemask = 0, dst_writemask = 0; -+ -+ if (instr->reg.allocated || !instr->last_read) -+ return; -+ -+ if (instr->type == HLSL_IR_EXPR) -+ { -+ switch (hlsl_ir_expr(instr)->op) -+ { -+ case HLSL_OP1_COS_REDUCED: -+ dst_writemask = VKD3DSP_WRITEMASK_0; -+ reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_0; -+ break; -+ -+ case HLSL_OP1_SIN_REDUCED: -+ dst_writemask = VKD3DSP_WRITEMASK_1; -+ reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1; -+ break; -+ -+ default: -+ break; -+ } -+ } -+ -+ if (reg_writemask) -+ instr->reg = allocate_register_with_masks(ctx, allocator, -+ instr->index, instr->last_read, reg_writemask, dst_writemask); -+ else -+ instr->reg = allocate_numeric_registers_for_type(ctx, allocator, -+ instr->index, instr->last_read, instr->data_type); -+ -+ TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, -+ debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); -+} -+ - static void allocate_variable_temp_register(struct hlsl_ctx *ctx, - struct hlsl_ir_var *var, struct register_allocator *allocator) - { -@@ -4472,13 +4637,7 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, - if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) - continue; - -- if (!instr->reg.allocated && instr->last_read) -- { -- instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, -- instr->data_type); -- TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, -- debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); -- } -+ allocate_instr_temp_register(ctx, instr, allocator); - - switch (instr->type) - { -@@ -6050,6 +6209,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_round, body); - lower_ir(ctx, lower_ceil, body); - lower_ir(ctx, lower_floor, body); -+ lower_ir(ctx, lower_trig, body); - lower_ir(ctx, lower_comparison_operators, body); - lower_ir(ctx, lower_logic_not, body); - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index be9e4219d6a..9202c77cadb 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -377,6 +377,58 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *program, -+ struct vkd3d_shader_instruction *sincos) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = sincos - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int s; -+ -+ if (sincos->dst_count != 1) -+ return VKD3D_OK; -+ -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &instructions->elements[pos + 1]; -+ -+ if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_SINCOS, 2, 1))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins->flags = sincos->flags; -+ -+ *ins->src = *sincos->src; -+ /* Set the source swizzle to replicate the first component. */ -+ s = vsir_swizzle_get_component(sincos->src->swizzle, 0); -+ ins->src->swizzle = vkd3d_shader_create_swizzle(s, s, s, s); -+ -+ if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_1) -+ { -+ ins->dst[0] = *sincos->dst; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_1; -+ } -+ else -+ { -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); -+ } -+ -+ if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_0) -+ { -+ ins->dst[1] = *sincos->dst; -+ ins->dst[1].write_mask = VKD3DSP_WRITEMASK_0; -+ } -+ else -+ { -+ vsir_dst_param_init(&ins->dst[1], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); -+ } -+ -+ /* Make the original instruction no-op */ -+ vkd3d_shader_instruction_make_nop(sincos); -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) - { -@@ -410,6 +462,11 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - vkd3d_shader_instruction_make_nop(ins); - break; - -+ case VKD3DSIH_SINCOS: -+ if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) -+ return ret; -+ break; -+ - default: - break; - } --- -2.43.0 - diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-7eb63a7c0d23a83bbdfcfa5ed83b9434370.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-7eb63a7c0d23a83bbdfcfa5ed83b9434370.patch deleted file mode 100644 index 3e9649af..00000000 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-7eb63a7c0d23a83bbdfcfa5ed83b9434370.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 30a352b7adc39f9e6c120e90610663f253475b71 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 18 Jul 2024 10:01:10 +1000 -Subject: [PATCH] Updated vkd3d to 7eb63a7c0d23a83bbdfcfa5ed83b943437051138. - ---- - libs/vkd3d/libs/vkd3d/command.c | 58 ++++++++++++++++----------------- - libs/vkd3d/libs/vkd3d/device.c | 4 ++- - 2 files changed, 32 insertions(+), 30 deletions(-) - -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 2354938c08d..a484da94092 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -2977,30 +2977,20 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list - enum vkd3d_pipeline_bind_point bind_point) - { - struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; -+ VkWriteDescriptorSet descriptor_writes[ARRAY_SIZE(bindings->push_descriptors)] = {0}; -+ VkDescriptorBufferInfo buffer_infos[ARRAY_SIZE(bindings->push_descriptors)] = {0}; - const struct d3d12_root_signature *root_signature = bindings->root_signature; -- VkWriteDescriptorSet *descriptor_writes = NULL, *current_descriptor_write; - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; -- VkDescriptorBufferInfo *buffer_infos = NULL, *current_buffer_info; - const struct d3d12_root_parameter *root_parameter; - struct vkd3d_push_descriptor *push_descriptor; - struct d3d12_device *device = list->device; - VkDescriptorBufferInfo *vk_buffer_info; -- unsigned int i, descriptor_count; -+ unsigned int i, descriptor_count = 0; - VkBufferView *vk_buffer_view; - - if (!bindings->push_descriptor_dirty_mask) - return; - -- descriptor_count = vkd3d_popcount(bindings->push_descriptor_dirty_mask); -- -- if (!(descriptor_writes = vkd3d_calloc(descriptor_count, sizeof(*descriptor_writes)))) -- return; -- if (!(buffer_infos = vkd3d_calloc(descriptor_count, sizeof(*buffer_infos)))) -- goto done; -- -- descriptor_count = 0; -- current_buffer_info = buffer_infos; -- current_descriptor_write = descriptor_writes; - for (i = 0; i < ARRAY_SIZE(bindings->push_descriptors); ++i) - { - if (!(bindings->push_descriptor_dirty_mask & (1u << i))) -@@ -3012,7 +3002,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list - if (root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV) - { - vk_buffer_view = NULL; -- vk_buffer_info = current_buffer_info; -+ vk_buffer_info = &buffer_infos[descriptor_count]; - vk_buffer_info->buffer = push_descriptor->u.cbv.vk_buffer; - vk_buffer_info->offset = push_descriptor->u.cbv.offset; - vk_buffer_info->range = VK_WHOLE_SIZE; -@@ -3023,21 +3013,15 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list - vk_buffer_info = NULL; - } - -- if (!vk_write_descriptor_set_from_root_descriptor(current_descriptor_write, -+ if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], - root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) - continue; - - ++descriptor_count; -- ++current_descriptor_write; -- ++current_buffer_info; - } - - VK_CALL(vkUpdateDescriptorSets(device->vk_device, descriptor_count, descriptor_writes, 0, NULL)); - bindings->push_descriptor_dirty_mask = 0; -- --done: -- vkd3d_free(descriptor_writes); -- vkd3d_free(buffer_infos); - } - - static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_command_list *list, -@@ -5289,11 +5273,13 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, - unsigned int rect_count, const D3D12_RECT *rects) - { -+ const VkPhysicalDeviceLimits *device_limits = &list->device->vk_info.device_limits; - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - unsigned int i, miplevel_idx, layer_count; - struct vkd3d_uav_clear_pipeline pipeline; - struct vkd3d_uav_clear_args clear_args; - const struct vkd3d_resource_view *view; -+ uint32_t count_x, count_y, count_z; - VkDescriptorImageInfo image_info; - D3D12_RECT full_rect, curr_rect; - VkWriteDescriptorSet write_set; -@@ -5384,18 +5370,32 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom) - continue; - -- clear_args.offset.x = curr_rect.left; - clear_args.offset.y = curr_rect.top; -- clear_args.extent.width = curr_rect.right - curr_rect.left; - clear_args.extent.height = curr_rect.bottom - curr_rect.top; - -- VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, -- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); -+ count_y = vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height); -+ count_z = vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth); -+ if (count_y > device_limits->maxComputeWorkGroupCount[1]) -+ FIXME("Group Y count %u exceeds max %u.\n", count_y, device_limits->maxComputeWorkGroupCount[1]); -+ if (count_z > device_limits->maxComputeWorkGroupCount[2]) -+ FIXME("Group Z count %u exceeds max %u.\n", count_z, device_limits->maxComputeWorkGroupCount[2]); - -- VK_CALL(vkCmdDispatch(list->vk_command_buffer, -- vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width), -- vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height), -- vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth))); -+ do -+ { -+ clear_args.offset.x = curr_rect.left; -+ clear_args.extent.width = curr_rect.right - curr_rect.left; -+ -+ count_x = vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width); -+ count_x = min(count_x, device_limits->maxComputeWorkGroupCount[0]); -+ -+ VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, -+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); -+ -+ VK_CALL(vkCmdDispatch(list->vk_command_buffer, count_x, count_y, count_z)); -+ -+ curr_rect.left += count_x * pipeline.group_size.width; -+ } -+ while (curr_rect.right > curr_rect.left); - } - } - -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 2bbc170504e..ff3e41e6b70 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -2563,7 +2563,9 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) - VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; -- device->vk_pool_count = 2; -+ pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; -+ pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); -+ device->vk_pool_count = 3; - return; - } - --- -2.43.0 - diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-947b937a1afc0f1d57b11883dad9ffb3fbd.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-947b937a1afc0f1d57b11883dad9ffb3fbd.patch deleted file mode 100644 index 1ccd74a4..00000000 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-947b937a1afc0f1d57b11883dad9ffb3fbd.patch +++ /dev/null @@ -1,1116 +0,0 @@ -From cc7fdb8f0081c445a375aa2758199b29f1559e1c Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 26 Jul 2024 09:32:27 +1000 -Subject: [PATCH] Updated vkd3d to 947b937a1afc0f1d57b11883dad9ffb3fbdf6380. - ---- - libs/vkd3d/include/vkd3d_shader.h | 24 +++ - libs/vkd3d/libs/vkd3d-shader/fx.c | 16 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 163 ++++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 73 ++++++-- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 77 ++++----- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 2 + - libs/vkd3d/libs/vkd3d/device.c | 146 +++++++--------- - libs/vkd3d/libs/vkd3d/state.c | 56 +++--- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 1 - - 9 files changed, 367 insertions(+), 191 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index 4acb622468a..d4756810065 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -553,6 +553,30 @@ enum vkd3d_shader_parameter_name - * \since 1.13 - */ - VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, -+ /** -+ * Whether to use flat interpolation for fragment shader colour inputs. -+ * If the value is nonzero, inputs whose semantic usage is COLOR will use -+ * flat interpolation instead of linear. -+ * This parameter is ignored if the shader model is 4 or greater, since only -+ * shader model 3 and below do not specify the interpolation mode in the -+ * shader bytecode. -+ * -+ * This parameter can be used to implement fixed function shade mode, as -+ * present in Direct3D versions up to 9, if the target environment does not -+ * support shade mode as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ * -+ * The default value is zero, i.e. use linear interpolation. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), - }; -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index bd2ad1290cd..15a518c07db 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -189,8 +189,8 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) - - static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -- if (var->state_block_count) -- hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); -+ if (var->state_block_count && var->state_blocks[0]->count) -+ hlsl_fixme(fx->ctx, &var->loc, "Write pass assignments."); - - fx->ops->write_pass(var, fx); - } -@@ -397,6 +397,9 @@ static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx - - /* TODO: annotations */ - /* TODO: assignments */ -+ -+ /* For some reason every pass adds to the total shader object count. */ -+ fx->shader_count++; - } - - static uint32_t get_fx_4_type_size(const struct hlsl_type *type) -@@ -852,6 +855,10 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - case HLSL_CLASS_STRUCT: - put_u32(buffer, type->e.record.field_count); - break; -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_PIXEL_SHADER: -+ fx->shader_count += elements_count; -+ break; - default: - ; - } -@@ -1063,7 +1070,7 @@ static const struct fx_write_context_ops fx_2_ops = - - static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - { -- uint32_t offset, size, technique_count, parameter_count, object_count; -+ uint32_t offset, size, technique_count, shader_count, parameter_count, object_count; - struct vkd3d_bytecode_buffer buffer = { 0 }; - struct vkd3d_bytecode_buffer *structured; - struct fx_write_context fx; -@@ -1080,7 +1087,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - parameter_count = put_u32(structured, 0); /* Parameter count */ - technique_count = put_u32(structured, 0); -- put_u32(structured, 0); /* Unknown */ -+ shader_count = put_u32(structured, 0); - object_count = put_u32(structured, 0); - - write_fx_2_parameters(&fx); -@@ -1089,6 +1096,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - write_techniques(ctx->globals, &fx); - set_u32(structured, technique_count, fx.technique_count); -+ set_u32(structured, shader_count, fx.shader_count); - - put_u32(structured, 0); /* String count */ - put_u32(structured, 0); /* Resource count */ -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 56736a65306..312eaec8a73 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -34,6 +34,14 @@ struct parse_fields - size_t count, capacity; - }; - -+struct parse_initializer -+{ -+ struct hlsl_ir_node **args; -+ unsigned int args_count; -+ struct hlsl_block *instrs; -+ bool braces; -+}; -+ - struct parse_parameter - { - struct hlsl_type *type; -@@ -41,6 +49,7 @@ struct parse_parameter - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; - uint32_t modifiers; -+ struct parse_initializer initializer; - }; - - struct parse_colon_attribute -@@ -49,14 +58,6 @@ struct parse_colon_attribute - struct hlsl_reg_reservation reg_reservation; - }; - --struct parse_initializer --{ -- struct hlsl_ir_node **args; -- unsigned int args_count; -- struct hlsl_block *instrs; -- bool braces; --}; -- - struct parse_array_sizes - { - uint32_t *sizes; /* innermost first */ -@@ -73,6 +74,7 @@ struct parse_variable_def - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; - struct parse_initializer initializer; -+ struct hlsl_scope *annotations; - - struct hlsl_type *basic_type; - uint32_t modifiers; -@@ -1188,6 +1190,9 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, - return true; - } - -+static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, -+ struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src); -+ - static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, - struct parse_parameter *param, const struct vkd3d_shader_location *loc) - { -@@ -1204,11 +1209,52 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() is not allowed on function parameters."); - -+ if (parameters->count && parameters->vars[parameters->count - 1]->default_values -+ && !param->initializer.args_count) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -+ "Missing default value for parameter '%s'.", param->name); -+ -+ if (param->initializer.args_count && (param->modifiers & HLSL_STORAGE_OUT)) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Output parameter '%s' has a default value.", param->name); -+ - if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, - ¶m->reg_reservation))) - return false; - var->is_param = 1; - -+ if (param->initializer.args_count) -+ { -+ unsigned int component_count = hlsl_type_component_count(param->type); -+ unsigned int store_index = 0; -+ unsigned int size, i; -+ -+ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) -+ return false; -+ -+ if (!param->initializer.braces) -+ { -+ if (!(add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc))) -+ return false; -+ -+ param->initializer.args[0] = node_from_block(param->initializer.instrs); -+ } -+ -+ size = initializer_size(¶m->initializer); -+ if (component_count != size) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u components in initializer, but got %u.", component_count, size); -+ } -+ -+ for (i = 0; i < param->initializer.args_count; ++i) -+ { -+ initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]); -+ } -+ -+ free_parse_initializer(¶m->initializer); -+ } -+ - if (!hlsl_add_var(ctx, var, false)) - { - hlsl_free_var(var); -@@ -2226,7 +2272,9 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - /* For some reason, for matrices, values from default value initializers end up in different - * components than from regular initializers. Default value initializers fill the matrix in - * vertical reading order (left-to-right top-to-bottom) instead of regular reading order -- * (top-to-bottom left-to-right), so they have to be adjusted. */ -+ * (top-to-bottom left-to-right), so they have to be adjusted. -+ * An exception is that the order of matrix initializers for function parameters are row-major -+ * (top-to-bottom left-to-right). */ - static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, - struct hlsl_type *type, unsigned int index) - { -@@ -2299,7 +2347,11 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - return; - default_value.value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); - -- dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); -+ if (dst->is_param) -+ dst_index = *store_index; -+ else -+ dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); -+ - dst->default_values[dst_index] = default_value; - - hlsl_block_cleanup(&block); -@@ -2498,6 +2550,8 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - return; - } - -+ var->annotations = v->annotations; -+ - if (constant_buffer && ctx->cur_scope == ctx->globals) - { - if (!(var_name = vkd3d_strdup(v->name))) -@@ -2567,6 +2621,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, - "Const variable \"%s\" is missing an initializer.", var->name); - } -+ -+ if (var->annotations) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Annotations are only allowed for objects in the global scope."); -+ } - } - - if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -@@ -2742,14 +2802,18 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, - { - unsigned int i; - -- if (decl->parameters.count != args->args_count) -+ if (decl->parameters.count < args->args_count) - return false; - -- for (i = 0; i < decl->parameters.count; ++i) -+ for (i = 0; i < args->args_count; ++i) - { - if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type)) - return false; - } -+ -+ if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values) -+ return false; -+ - return true; - } - -@@ -2792,11 +2856,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - const struct parse_initializer *args, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *call; -- unsigned int i; -+ unsigned int i, j; - -- assert(args->args_count == func->parameters.count); -+ assert(args->args_count <= func->parameters.count); - -- for (i = 0; i < func->parameters.count; ++i) -+ for (i = 0; i < args->args_count; ++i) - { - struct hlsl_ir_var *param = func->parameters.vars[i]; - struct hlsl_ir_node *arg = args->args[i]; -@@ -2821,11 +2885,40 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - } - } - -+ /* Add default values for the remaining parameters. */ -+ for (i = args->args_count; i < func->parameters.count; ++i) -+ { -+ struct hlsl_ir_var *param = func->parameters.vars[i]; -+ unsigned int comp_count = hlsl_type_component_count(param->data_type); -+ struct hlsl_deref param_deref; -+ -+ assert(param->default_values); -+ -+ hlsl_init_simple_deref_from_var(¶m_deref, param); -+ -+ for (j = 0; j < comp_count; ++j) -+ { -+ struct hlsl_type *type = hlsl_type_get_component_type(ctx, param->data_type, j); -+ struct hlsl_constant_value value; -+ struct hlsl_ir_node *comp; -+ struct hlsl_block store_block; -+ -+ value.u[0] = param->default_values[j].value; -+ if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, comp); -+ -+ if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) -+ return false; -+ hlsl_block_add_block(args->instrs, &store_block); -+ } -+ } -+ - if (!(call = hlsl_new_call(ctx, func, loc))) - return false; - hlsl_block_add_instr(args->instrs, call); - -- for (i = 0; i < func->parameters.count; ++i) -+ for (i = 0; i < args->args_count; ++i) - { - struct hlsl_ir_var *param = func->parameters.vars[i]; - struct hlsl_ir_node *arg = args->args[i]; -@@ -3206,6 +3299,29 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); - } - -+static bool intrinsic_asint(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *data_type; -+ -+ data_type = params->args[0]->data_type; -+ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, data_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong argument type of asint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", -+ string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_INT); -+ -+ operands[0] = params->args[0]; -+ return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); -+} -+ - static bool intrinsic_asuint(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4732,6 +4848,7 @@ intrinsic_functions[] = - {"any", 1, true, intrinsic_any}, - {"asfloat", 1, true, intrinsic_asfloat}, - {"asin", 1, true, intrinsic_asin}, -+ {"asint", 1, true, intrinsic_asint}, - {"asuint", -1, true, intrinsic_asuint}, - {"atan", 1, true, intrinsic_atan}, - {"atan2", 2, true, intrinsic_atan2}, -@@ -6067,6 +6184,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %type name_opt - - %type parameter -+%type parameter_decl - - %type param_list - %type parameters -@@ -6902,6 +7020,14 @@ param_list: - } - - parameter: -+ parameter_decl -+ | parameter_decl '=' complex_initializer -+ { -+ $$ = $1; -+ $$.initializer = $3; -+ } -+ -+parameter_decl: - var_modifiers type_no_void any_identifier arrays colon_attribute - { - uint32_t modifiers = $1; -@@ -6934,6 +7060,8 @@ parameter: - $$.name = $3; - $$.semantic = $5.semantic; - $$.reg_reservation = $5.reg_reservation; -+ -+ memset(&$$.initializer, 0, sizeof($$.initializer)); - } - - texture_type: -@@ -7370,7 +7498,7 @@ variables_def_typed: - } - - variable_decl: -- any_identifier arrays colon_attribute -+ any_identifier arrays colon_attribute annotations_opt - { - $$ = hlsl_alloc(ctx, sizeof(*$$)); - $$->loc = @1; -@@ -7378,6 +7506,7 @@ variable_decl: - $$->arrays = $2; - $$->semantic = $3.semantic; - $$->reg_reservation = $3.reg_reservation; -+ $$->annotations = $4; - } - - state_block_start: -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 9202c77cadb..e0ac6322c71 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -105,6 +105,18 @@ void vsir_program_cleanup(struct vsir_program *program) - shader_signature_cleanup(&program->patch_constant_signature); - } - -+const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( -+ const struct vsir_program *program, enum vkd3d_shader_parameter_name name) -+{ -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ if (program->parameters[i].name == name) -+ return &program->parameters[i]; -+ } -+ -+ return NULL; -+} -+ - static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) - { - return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; -@@ -1711,7 +1723,33 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - } - } - --static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) -+static bool use_flat_interpolation(const struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ static const struct vkd3d_shader_location no_loc; -+ const struct vkd3d_shader_parameter1 *parameter; -+ -+ if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) -+ return false; -+ -+ if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported flat interpolation parameter type %#x.\n", parameter->type); -+ return false; -+ } -+ if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); -+ return false; -+ } -+ -+ return parameter->u.immediate_constant.u.u32; -+} -+ -+static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { - struct io_normaliser normaliser = {program->instructions}; - struct vkd3d_shader_instruction *ins; -@@ -1774,6 +1812,18 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - return VKD3D_ERROR_OUT_OF_MEMORY; - } - -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL -+ && program->shader_version.major < 4 && use_flat_interpolation(program, message_context)) -+ { -+ for (i = 0; i < program->input_signature.element_count; ++i) -+ { -+ struct signature_element *element = &program->input_signature.elements[i]; -+ -+ if (!ascii_strcasecmp(element->semantic_name, "COLOR")) -+ element->interpolation_mode = VKD3DSIM_CONSTANT; -+ } -+ } -+ - normaliser.phase = VKD3DSIH_INVALID; - for (i = 0; i < normaliser.instructions.count; ++i) - shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); -@@ -5497,10 +5547,8 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr - } - - dst_param_init_ssa_bool(&ins->dst[0], program->ssa_count); -- ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); -- ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4; -- ins->src[1].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); -+ ins->src[opcodes[compare_func].swap ? 1 : 0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); - - ++ins; - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); -@@ -5541,17 +5589,8 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro - || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) - return VKD3D_OK; - -- for (unsigned int i = 0; i < program->parameter_count; ++i) -- { -- const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -- -- if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC) -- func = parameter; -- else if (parameter->name == VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF) -- ref = parameter; -- } -- -- if (!func || !ref) -+ if (!(func = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC)) -+ || !(ref = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF))) - return VKD3D_OK; - - if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -@@ -6590,7 +6629,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - return result; - } - -- if ((result = vsir_program_normalise_io_registers(program)) < 0) -+ if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) - return result; - - if ((result = instruction_array_normalise_flat_constants(program)) < 0) -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 72a6f1e60dc..d66446be0b0 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -2393,6 +2393,7 @@ struct ssa_register_info - struct spirv_compiler - { - struct vkd3d_spirv_builder spirv_builder; -+ const struct vsir_program *program; - - struct vkd3d_shader_message_context *message_context; - struct vkd3d_shader_location location; -@@ -2418,8 +2419,6 @@ struct spirv_compiler - uint32_t *descriptor_offset_ids; - struct vkd3d_push_constant_buffer_binding *push_constants; - const struct vkd3d_shader_spirv_target_info *spirv_target_info; -- const struct vkd3d_shader_parameter1 *parameters; -- unsigned int parameter_count; - - struct - { -@@ -2536,13 +2535,10 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, uint64_t config_flags) - { -- const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; -- const struct shader_signature *output_signature = &program->output_signature; - const struct vkd3d_shader_interface_info *shader_interface; - const struct vkd3d_shader_descriptor_offset_info *offset_info; - const struct vkd3d_shader_spirv_target_info *target_info; - struct spirv_compiler *compiler; -- unsigned int max_element_count; - unsigned int i; - - if (!(compiler = vkd3d_malloc(sizeof(*compiler)))) -@@ -2570,13 +2566,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - compiler->spirv_target_info = target_info; - } - -- max_element_count = max(output_signature->element_count, patch_constant_signature->element_count); -- if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -- { -- vkd3d_free(compiler); -- return NULL; -- } -- - vkd3d_spirv_builder_init(&compiler->spirv_builder, spirv_compiler_get_entry_point_name(compiler)); - - compiler->formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT -@@ -3297,20 +3286,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil - return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); - } - --static const struct vkd3d_shader_parameter1 *spirv_compiler_get_shader_parameter( -- struct spirv_compiler *compiler, enum vkd3d_shader_parameter_name name) --{ -- unsigned int i; -- -- for (i = 0; i < compiler->parameter_count; ++i) -- { -- if (compiler->parameters[i].name == name) -- return &compiler->parameters[i]; -- } -- -- return NULL; --} -- - static const struct vkd3d_spec_constant_info - { - enum vkd3d_shader_parameter_name name; -@@ -3341,12 +3316,11 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com - { - if (!compiler->current_spec_constant_id) - { -- const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - unsigned int i, id = 0; - -- for (i = 0; info && i < info->parameter_count; ++i) -+ for (i = 0; i < compiler->program->parameter_count; ++i) - { -- const struct vkd3d_shader_parameter *current = &info->parameters[i]; -+ const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; - - if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) - id = max(current->u.specialization_constant.id + 1, id); -@@ -3404,7 +3378,7 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi - const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -- unsigned int index = parameter - compiler->parameters; -+ unsigned int index = parameter - compiler->program->parameters; - uint32_t type_id, ptr_id, ptr_type_id; - - type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); -@@ -3416,17 +3390,29 @@ static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compi - } - - static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name) -+ enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type) - { - const struct vkd3d_shader_parameter1 *parameter; -- enum vkd3d_data_type type = VKD3D_DATA_UINT; - -- if (!(parameter = spirv_compiler_get_shader_parameter(compiler, name))) -+ static const struct -+ { -+ enum vkd3d_data_type type; -+ } -+ type_map[] = -+ { -+ [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT}, -+ [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT}, -+ }; -+ -+ if (!(parameter = vsir_program_get_parameter(compiler->program, name))) - { - WARN("Unresolved shader parameter %#x.\n", name); - goto default_parameter; - } - -+ if (type_map[parameter->data_type].type != type) -+ ERR("Expected data type %#x for parameter %#x, got %#x.\n", type, name, parameter->data_type); -+ - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - { - if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -@@ -3435,11 +3421,6 @@ static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *comp - return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); - } - -- if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -- type = VKD3D_DATA_FLOAT; -- else -- type = VKD3D_DATA_UINT; -- - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) - return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) -@@ -4225,7 +4206,7 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - else if (reg->type == VKD3DSPR_UNDEF) - return spirv_compiler_emit_load_undef(compiler, reg, write_mask); - else if (reg->type == VKD3DSPR_PARAMETER) -- return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset); -+ return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type); - - component_count = vsir_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -9566,7 +9547,7 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co - if (src->reg.type == VKD3DSPR_RASTERIZER) - { - val_id = spirv_compiler_emit_shader_parameter(compiler, -- VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT); -+ VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT); - } - else - { -@@ -10597,12 +10578,16 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - struct vkd3d_shader_instruction_array instructions; - enum vkd3d_shader_spirv_environment environment; - enum vkd3d_result result = VKD3D_OK; -- unsigned int i; -+ unsigned int i, max_element_count; - - if ((result = vsir_program_normalise(program, compiler->config_flags, - compile_info, compiler->message_context)) < 0) - return result; - -+ max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); -+ if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ - if (program->temp_count) - spirv_compiler_emit_temps(compiler, program->temp_count); - if (program->ssa_count) -@@ -10610,12 +10595,10 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - - spirv_compiler_emit_descriptor_declarations(compiler); - -- compiler->parameter_count = program->parameter_count; -- compiler->parameters = program->parameters; -- compiler->spirv_parameter_info = vkd3d_calloc(compiler->parameter_count, sizeof(*compiler->spirv_parameter_info)); -- for (i = 0; i < compiler->parameter_count; ++i) -+ compiler->spirv_parameter_info = vkd3d_calloc(program->parameter_count, sizeof(*compiler->spirv_parameter_info)); -+ for (i = 0; i < program->parameter_count; ++i) - { -- const struct vkd3d_shader_parameter1 *parameter = &compiler->parameters[i]; -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; - - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) - { -@@ -10642,6 +10625,8 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -+ compiler->program = program; -+ - instructions = program->instructions; - memset(&program->instructions, 0, sizeof(program->instructions)); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index bf9759ebbbf..7aff22e3420 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1382,6 +1382,8 @@ void vsir_program_cleanup(struct vsir_program *program); - int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); -+const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( -+ const struct vsir_program *program, enum vkd3d_shader_parameter_name name); - bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - const struct vkd3d_shader_version *version, unsigned int reserve); - enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index ff3e41e6b70..5fe381af90c 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -831,114 +831,90 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceFeatures2 features2; - }; - --static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) -+static void vkd3d_chain_physical_device_info_structures(struct vkd3d_physical_device_info *info, -+ struct d3d12_device *device) - { -- const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -- VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; -- VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; -- VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; -- VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; -- VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; -- VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; -- VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; -- VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; -- VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -- VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; -- VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; -- VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *mutable_features; -- VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; -- VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; -- VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; -- VkPhysicalDevice physical_device = device->vk_physical_device; -- VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; -- VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -- VkPhysicalDeviceSubgroupProperties *subgroup_properties; - -- memset(info, 0, sizeof(*info)); -- conditional_rendering_features = &info->conditional_rendering_features; -- depth_clip_features = &info->depth_clip_features; -- descriptor_indexing_features = &info->descriptor_indexing_features; -- fragment_shader_interlock_features = &info->fragment_shader_interlock_features; -- robustness2_features = &info->robustness2_features; -- descriptor_indexing_properties = &info->descriptor_indexing_properties; -- maintenance3_properties = &info->maintenance3_properties; -- demote_features = &info->demote_features; -- buffer_alignment_features = &info->texel_buffer_alignment_features; -- buffer_alignment_properties = &info->texel_buffer_alignment_properties; -- vertex_divisor_features = &info->vertex_divisor_features; -- vertex_divisor_properties = &info->vertex_divisor_properties; -- timeline_semaphore_features = &info->timeline_semaphore_features; -- mutable_features = &info->mutable_features; -- formats4444_features = &info->formats4444_features; -- xfb_features = &info->xfb_features; -- xfb_properties = &info->xfb_properties; -- subgroup_properties = &info->subgroup_properties; -+ info->features2.pNext = NULL; - -- info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; -- -- conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; - if (vulkan_info->EXT_conditional_rendering) -- vk_prepend_struct(&info->features2, conditional_rendering_features); -- depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->conditional_rendering_features); - if (vulkan_info->EXT_depth_clip_enable) -- vk_prepend_struct(&info->features2, depth_clip_features); -- descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->depth_clip_features); - if (vulkan_info->EXT_descriptor_indexing) -- vk_prepend_struct(&info->features2, descriptor_indexing_features); -- fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->descriptor_indexing_features); - if (vulkan_info->EXT_fragment_shader_interlock) -- vk_prepend_struct(&info->features2, fragment_shader_interlock_features); -- robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->fragment_shader_interlock_features); - if (vulkan_info->EXT_robustness2) -- vk_prepend_struct(&info->features2, robustness2_features); -- demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->robustness2_features); - if (vulkan_info->EXT_shader_demote_to_helper_invocation) -- vk_prepend_struct(&info->features2, demote_features); -- buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->demote_features); - if (vulkan_info->EXT_texel_buffer_alignment) -- vk_prepend_struct(&info->features2, buffer_alignment_features); -- xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->texel_buffer_alignment_features); - if (vulkan_info->EXT_transform_feedback) -- vk_prepend_struct(&info->features2, xfb_features); -- vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->xfb_features); - if (vulkan_info->EXT_vertex_attribute_divisor) -- vk_prepend_struct(&info->features2, vertex_divisor_features); -- timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; -+ vk_prepend_struct(&info->features2, &info->vertex_divisor_features); - if (vulkan_info->KHR_timeline_semaphore) -- vk_prepend_struct(&info->features2, timeline_semaphore_features); -- mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->timeline_semaphore_features); - if (vulkan_info->EXT_mutable_descriptor_type) -- vk_prepend_struct(&info->features2, mutable_features); -- formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->mutable_features); - if (vulkan_info->EXT_4444_formats) -- vk_prepend_struct(&info->features2, formats4444_features); -+ vk_prepend_struct(&info->features2, &info->formats4444_features); - -- if (vulkan_info->KHR_get_physical_device_properties2) -- VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -- else -- VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); -+ info->properties2.pNext = NULL; - -- info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; -- -- maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; - if (vulkan_info->KHR_maintenance3) -- vk_prepend_struct(&info->properties2, maintenance3_properties); -- descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->maintenance3_properties); - if (vulkan_info->EXT_descriptor_indexing) -- vk_prepend_struct(&info->properties2, descriptor_indexing_properties); -- buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->descriptor_indexing_properties); - if (vulkan_info->EXT_texel_buffer_alignment) -- vk_prepend_struct(&info->properties2, buffer_alignment_properties); -- xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->texel_buffer_alignment_properties); - if (vulkan_info->EXT_transform_feedback) -- vk_prepend_struct(&info->properties2, xfb_properties); -- vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->xfb_properties); - if (vulkan_info->EXT_vertex_attribute_divisor) -- vk_prepend_struct(&info->properties2, vertex_divisor_properties); -- subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; -+ vk_prepend_struct(&info->properties2, &info->vertex_divisor_properties); - if (d3d12_device_environment_is_vulkan_min_1_1(device)) -- vk_prepend_struct(&info->properties2, subgroup_properties); -+ vk_prepend_struct(&info->properties2, &info->subgroup_properties); -+} -+ -+static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) -+{ -+ const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -+ VkPhysicalDevice physical_device = device->vk_physical_device; -+ struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -+ -+ memset(info, 0, sizeof(*info)); -+ -+ info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; -+ info->conditional_rendering_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; -+ info->depth_clip_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; -+ info->descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; -+ info->fragment_shader_interlock_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -+ info->robustness2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; -+ info->demote_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -+ info->texel_buffer_alignment_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; -+ info->xfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; -+ info->vertex_divisor_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; -+ info->timeline_semaphore_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; -+ info->mutable_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; -+ info->formats4444_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -+ -+ info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; -+ info->maintenance3_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; -+ info->descriptor_indexing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; -+ info->texel_buffer_alignment_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; -+ info->xfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; -+ info->vertex_divisor_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; -+ info->subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; -+ -+ vkd3d_chain_physical_device_info_structures(info, device); -+ -+ if (vulkan_info->KHR_get_physical_device_properties2) -+ VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -+ else -+ VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); -@@ -1840,6 +1816,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->properties2.properties.limits); - -+ vkd3d_chain_physical_device_info_structures(physical_device_info, device); -+ - return S_OK; - } - -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 7197193523d..bbfaaad47dd 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -645,7 +645,7 @@ static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_ro - return S_OK; - } - --static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, -+static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, - bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, - unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) -@@ -670,33 +670,38 @@ static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature * - } - - if (context->unbounded_offset != UINT_MAX) -- d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ -+ return S_OK; - } - --static uint32_t d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, -+static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, - unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, -- enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) -+ enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, -+ uint32_t *first_binding) - { -- uint32_t first_binding; - unsigned int i; -+ HRESULT hr; - - is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; - duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV - || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - && duplicate_descriptors; - -- first_binding = context->descriptor_binding; -+ *first_binding = context->descriptor_binding; - for (i = 0; i < binding_count; ++i) - { -- if (duplicate_descriptors) -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -- base_register_idx + i, true, shader_visibility, 1, context); -+ if (duplicate_descriptors -+ && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ register_space, base_register_idx + i, true, shader_visibility, 1, context))) -+ return hr; - -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -- base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context); -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -+ base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) -+ return hr; - } -- return first_binding; -+ return S_OK; - } - - static uint32_t vkd3d_descriptor_magic_from_d3d12(D3D12_DESCRIPTOR_RANGE_TYPE type) -@@ -764,6 +769,7 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r - enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); - bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; - enum vkd3d_shader_descriptor_type descriptor_type = range->type; -+ HRESULT hr; - - if (range->descriptor_count == UINT_MAX) - context->unbounded_offset = range->offset; -@@ -775,8 +781,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r - return E_NOTIMPL; - ++context->current_binding; - -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, true, shader_visibility, range->vk_binding_count, context); -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -+ range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) -+ return hr; - } - - if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, -@@ -784,8 +791,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r - return E_NOTIMPL; - ++context->current_binding; - -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context); -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -+ range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) -+ return hr; - - context->unbounded_offset = UINT_MAX; - -@@ -1130,9 +1138,10 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - - cur_binding = context->current_binding; - -- vk_binding = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, -- shader_visibility, context); -+ shader_visibility, context, &vk_binding))) -+ return hr; - - /* Unroll descriptor range. */ - for (k = 0; k < range->descriptor_count; ++k) -@@ -1175,6 +1184,7 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign - { - VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; - unsigned int i; -+ HRESULT hr; - - root_signature->push_descriptor_mask = 0; - -@@ -1188,10 +1198,11 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign - - root_signature->push_descriptor_mask |= 1u << i; - -- cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), - p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, -- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context); -+ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) -+ return hr; - cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); - cur_binding->descriptorCount = 1; - cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); -@@ -1223,9 +1234,10 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) - return hr; - -- cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, -- vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context); -+ vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) -+ return hr; - cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - cur_binding->descriptorCount = 1; - cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index 7acd39d65be..cae8aa69c8b 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -1756,7 +1756,6 @@ static inline void vk_prepend_struct(void *header, void *structure) - { - VkBaseOutStructure *vk_header = header, *vk_structure = structure; - -- assert(!vk_structure->pNext); - vk_structure->pNext = vk_header->pNext; - vk_header->pNext = vk_structure; - } --- -2.43.0 - diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch deleted file mode 100644 index 0bcc0714..00000000 --- a/patches/vkd3d-latest/0006-Updated-vkd3d-to-b23874dad600ec777c0bbe6ecc7aa3f5020.patch +++ /dev/null @@ -1,7977 +0,0 @@ -From 0ecf5a7422b7001bee116ec98919724ffb0ec8b3 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Fri, 9 Aug 2024 07:52:46 +1000 -Subject: [PATCH] Updated vkd3d to b23874dad600ec777c0bbe6ecc7aa3f5020476d1. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 70 ++- - libs/vkd3d/libs/vkd3d-common/debug.c | 13 +- - libs/vkd3d/libs/vkd3d-shader/checksum.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 82 ++- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 6 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 46 +- - libs/vkd3d/libs/vkd3d-shader/fx.c | 486 ++++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 196 ++++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 225 ++++++-- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 55 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 127 ++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 125 ++++- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 384 +++++++++++--- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 110 ++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 108 ++-- - libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 4 +- - libs/vkd3d/libs/vkd3d-shader/preproc.y | 2 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 180 ++++--- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 152 +++--- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 12 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 10 +- - libs/vkd3d/libs/vkd3d/cache.c | 9 +- - libs/vkd3d/libs/vkd3d/command.c | 396 +++++++------- - libs/vkd3d/libs/vkd3d/device.c | 24 +- - libs/vkd3d/libs/vkd3d/resource.c | 42 +- - libs/vkd3d/libs/vkd3d/state.c | 187 +++++-- - libs/vkd3d/libs/vkd3d/utils.c | 4 +- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 8 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 8 +- - 31 files changed, 2151 insertions(+), 926 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index e7b25602ec0..c62dc00415f 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -46,6 +46,12 @@ - - #define STATIC_ASSERT(e) extern void __VKD3D_STATIC_ASSERT__(int [(e) ? 1 : -1]) - -+#define VKD3D_ASSERT(cond) \ -+ do { \ -+ if (!(cond)) \ -+ ERR("Failed assertion: %s\n", #cond); \ -+ } while (0) -+ - #define MEMBER_SIZE(t, m) sizeof(((t *)0)->m) - - #define VKD3D_MAKE_TAG(ch0, ch1, ch2, ch3) \ -@@ -102,17 +108,11 @@ static inline uint64_t align(uint64_t addr, size_t alignment) - # define VKD3D_UNREACHABLE (void)0 - #endif /* __GNUC__ */ - --VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsigned int line) --{ -- fprintf(stderr, "%s:%u: Aborting, reached unreachable code.\n", filename, line); -- abort(); --} -- --#ifdef NDEBUG --#define vkd3d_unreachable() VKD3D_UNREACHABLE --#else --#define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) --#endif -+#define vkd3d_unreachable() \ -+ do { \ -+ ERR("%s:%u: Unreachable code reached.\n", __FILE__, __LINE__); \ -+ VKD3D_UNREACHABLE; \ -+ } while (0) - - #ifdef VKD3D_NO_TRACE_MESSAGES - #define TRACE(args...) do { } while (0) -@@ -122,11 +122,19 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig - #ifdef VKD3D_NO_DEBUG_MESSAGES - #define WARN(args...) do { } while (0) - #define FIXME(args...) do { } while (0) -+#define WARN_ON() (false) -+#define FIXME_ONCE(args...) do { } while (0) -+#endif -+ -+#ifdef VKD3D_NO_ERROR_MESSAGES -+#define ERR(args...) do { } while (0) -+#define MESSAGE(args...) do { } while (0) - #endif - - enum vkd3d_dbg_level - { - VKD3D_DBG_LEVEL_NONE, -+ VKD3D_DBG_LEVEL_MESSAGE, - VKD3D_DBG_LEVEL_ERR, - VKD3D_DBG_LEVEL_FIXME, - VKD3D_DBG_LEVEL_WARN, -@@ -147,7 +155,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - #define VKD3D_DBG_LOG(level) \ - do { \ - const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ -- VKD3D_DBG_PRINTF -+ VKD3D_DBG_PRINTF_##level - - #define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ - do { \ -@@ -155,24 +163,50 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ - ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ - vkd3d_dbg_next_time = true; \ -- VKD3D_DBG_PRINTF -+ VKD3D_DBG_PRINTF_##level - - #define VKD3D_DBG_PRINTF(...) \ - vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) - -+#define VKD3D_DBG_PRINTF_TRACE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#define VKD3D_DBG_PRINTF_WARN(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#define VKD3D_DBG_PRINTF_FIXME(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#define VKD3D_DBG_PRINTF_MESSAGE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+ -+#ifdef VKD3D_ABORT_ON_ERR -+#define VKD3D_DBG_PRINTF_ERR(...) \ -+ vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); \ -+ abort(); \ -+ } while (0) -+#else -+#define VKD3D_DBG_PRINTF_ERR(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#endif -+ -+/* Used by vkd3d_unreachable(). */ -+#ifdef VKD3D_CROSSTEST -+#undef ERR -+#define ERR(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0) -+#endif -+ - #ifndef TRACE --#define TRACE VKD3D_DBG_LOG(TRACE) -+#define TRACE VKD3D_DBG_LOG(TRACE) - #endif - - #ifndef WARN --#define WARN VKD3D_DBG_LOG(WARN) -+#define WARN VKD3D_DBG_LOG(WARN) - #endif - - #ifndef FIXME --#define FIXME VKD3D_DBG_LOG(FIXME) -+#define FIXME VKD3D_DBG_LOG(FIXME) - #endif - --#define ERR VKD3D_DBG_LOG(ERR) -+#ifndef ERR -+#define ERR VKD3D_DBG_LOG(ERR) -+#endif -+ -+#ifndef MESSAGE -+#define MESSAGE VKD3D_DBG_LOG(MESSAGE) -+#endif - - #ifndef TRACE_ON - #define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) -@@ -182,7 +216,9 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - #define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) - #endif - -+#ifndef FIXME_ONCE - #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) -+#endif - - #define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name - -diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index 4523fc997ef..4bfc19bd9a1 100644 ---- a/libs/vkd3d/libs/vkd3d-common/debug.c -+++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -45,11 +45,12 @@ extern const char *const vkd3d_dbg_env_name; - - static const char *const debug_level_names[] = - { -- [VKD3D_DBG_LEVEL_NONE ] = "none", -- [VKD3D_DBG_LEVEL_ERR ] = "err", -- [VKD3D_DBG_LEVEL_FIXME] = "fixme", -- [VKD3D_DBG_LEVEL_WARN ] = "warn", -- [VKD3D_DBG_LEVEL_TRACE] = "trace", -+ [VKD3D_DBG_LEVEL_NONE ] = "none", -+ [VKD3D_DBG_LEVEL_MESSAGE] = "message", -+ [VKD3D_DBG_LEVEL_ERR ] = "err", -+ [VKD3D_DBG_LEVEL_FIXME] = "fixme", -+ [VKD3D_DBG_LEVEL_WARN ] = "warn", -+ [VKD3D_DBG_LEVEL_TRACE] = "trace", - }; - - enum vkd3d_dbg_level vkd3d_dbg_get_level(void) -@@ -104,8 +105,6 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch - if (vkd3d_dbg_get_level() < level) - return; - -- assert(level < ARRAY_SIZE(debug_level_names)); -- - #ifdef _WIN32 - vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); - #elif HAVE_GETTID -diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c -index 0910729a0e9..d9560628c77 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/checksum.c -+++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c -@@ -288,7 +288,7 @@ void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksu - const uint8_t *ptr = dxbc; - struct md5_ctx ctx; - -- assert(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); -+ VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); - ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; - size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 8a3eb5a367a..2c2f0c43ece 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -1919,7 +1919,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, - } - else - { -- assert(icb->component_count == VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(icb->component_count == VKD3D_VEC4_SIZE); - for (i = 0; i < icb->element_count; ++i) - { - shader_print_hex_literal(compiler, " {", icb->data[4 * i + 0], ""); -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index 492ad9b69fb..a4c038a233a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -757,7 +757,7 @@ static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, - { - /* d3d shaders have a maximum of 8192 constants; we should not overrun - * this array. */ -- assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); -+ VKD3D_ASSERT((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); - bitmap_set(sm1->constants[set].def_mask, index); - } - } -@@ -1492,7 +1492,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return hlsl_sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else -@@ -1519,6 +1519,10 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - break; - } - -@@ -1618,6 +1622,10 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - break; - } - -@@ -1910,7 +1918,7 @@ static bool is_inconsequential_instr(const struct sm1_instruction *instr) - - static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) - { -- assert(reg->writemask); -+ VKD3D_ASSERT(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); - } - -@@ -2090,7 +2098,7 @@ static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_ - struct hlsl_ctx *ctx = d3dbc->ctx; - - /* Narrowing casts were already lowered. */ -- assert(src_type->dimx == dst_type->dimx); -+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); - - switch (dst_type->e.numeric.type) - { -@@ -2204,7 +2212,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, - else - { - ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); -- assert(ret); -+ VKD3D_ASSERT(ret); - reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; - reg.reg = element->register_index; - } -@@ -2348,8 +2356,8 @@ static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl - .src_count = 1, - }; - -- assert(instr->reg.allocated); -- assert(constant->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); -+ VKD3D_ASSERT(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); - } -@@ -2374,12 +2382,40 @@ static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, - static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, - const struct hlsl_reg *dst, const struct hlsl_reg *src) - { -+ struct sm1_instruction instr = -+ { -+ .opcode = D3DSIO_SINCOS, -+ -+ .dst.type = VKD3DSPR_TEMP, -+ .dst.writemask = dst->writemask, -+ .dst.reg = dst->id, -+ .has_dst = 1, -+ -+ .srcs[0].type = VKD3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), -+ .srcs[0].reg = src->id, -+ .src_count = 1, -+ }; -+ - if (op == HLSL_OP1_COS_REDUCED) -- assert(dst->writemask == VKD3DSP_WRITEMASK_0); -+ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); - else /* HLSL_OP1_SIN_REDUCED */ -- assert(dst->writemask == VKD3DSP_WRITEMASK_1); -+ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); -+ -+ if (d3dbc->ctx->profile->major_version < 3) -+ { -+ instr.src_count = 3; -+ -+ instr.srcs[1].type = VKD3DSPR_CONST; -+ instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); -+ instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; -+ -+ instr.srcs[2].type = VKD3DSPR_CONST; -+ instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); -+ instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; -+ } - -- d3dbc_write_unary_op(d3dbc, D3DSIO_SINCOS, dst, src, 0, 0); -+ d3dbc_write_instruction(d3dbc, &instr); - } - - static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -@@ -2391,7 +2427,7 @@ static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_ - struct hlsl_ir_node *arg3 = expr->operands[2].node; - struct hlsl_ctx *ctx = d3dbc->ctx; - -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - - if (expr->op == HLSL_OP1_REINTERPRET) - { -@@ -2534,7 +2570,7 @@ static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_no - struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; - - condition = iff->condition.node; -- assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); -+ VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); - - sm1_ifc = (struct sm1_instruction) - { -@@ -2616,11 +2652,11 @@ static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_ - .src_count = 1, - }; - -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - - if (load->src.var->is_uniform) - { -- assert(reg.allocated); -+ VKD3D_ASSERT(reg.allocated); - sm1_instr.srcs[0].type = VKD3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) -@@ -2628,7 +2664,7 @@ static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_ - if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, - load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { -- assert(reg.allocated); -+ VKD3D_ASSERT(reg.allocated); - sm1_instr.srcs[0].type = VKD3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } -@@ -2706,7 +2742,7 @@ static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct - return; - } - -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - - d3dbc_write_instruction(d3dbc, &sm1_instr); - } -@@ -2733,12 +2769,6 @@ static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir - .src_count = 1, - }; - -- if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) -- { -- hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks."); -- return; -- } -- - if (store->lhs.var->is_output_semantic) - { - if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) -@@ -2749,7 +2779,7 @@ static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir - else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, - store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { -- assert(reg.allocated); -+ VKD3D_ASSERT(reg.allocated); - sm1_instr.dst.type = VKD3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } -@@ -2757,7 +2787,7 @@ static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; - } - else -- assert(reg.allocated); -+ VKD3D_ASSERT(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); -@@ -2783,8 +2813,8 @@ static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ - .src_count = 1, - }; - -- assert(instr->reg.allocated); -- assert(val->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); -+ VKD3D_ASSERT(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - d3dbc_write_instruction(d3dbc, &sm1_instr); - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 4b9f67235aa..184788dc57e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -29,7 +29,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - { - struct vkd3d_shader_dxbc_section_desc *section; - -- assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); -+ VKD3D_ASSERT(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - - section = &dxbc->sections[dxbc->section_count++]; - section->tag = tag; -@@ -983,7 +983,7 @@ static int shader_parse_root_signature(const struct vkd3d_shader_code *data, - { - struct vkd3d_shader_root_signature_desc1 *v_1_1 = &desc->u.v_1_1; - -- assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); -+ VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); - - v_1_1->parameter_count = count; - if (v_1_1->parameter_count) -@@ -1777,7 +1777,7 @@ int vkd3d_shader_convert_root_signature(struct vkd3d_shader_versioned_root_signa - } - else - { -- assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); -+ VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); - ret = convert_root_signature_to_v1_1(dst, src); - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index bf581928a9e..2a0bbe1a625 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -941,7 +941,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length - if (!length) - return 0; - -- assert(length < 32); -+ VKD3D_ASSERT(length < 32); - - if (sm6_parser_is_end(sm6)) - { -@@ -949,7 +949,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length - return 0; - } - -- assert(sm6->bitpos < 32); -+ VKD3D_ASSERT(sm6->bitpos < 32); - bits = *sm6->ptr >> sm6->bitpos; - l = 32 - sm6->bitpos; - if (l <= length) -@@ -1208,7 +1208,7 @@ static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) - struct dxil_global_abbrev *global_abbrev; - enum vkd3d_result ret; - -- assert(block->id == BLOCKINFO_BLOCK); -+ VKD3D_ASSERT(block->id == BLOCKINFO_BLOCK); - - if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) - || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) -@@ -1477,7 +1477,7 @@ static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct - if (sm6->abbrevs[i]->block_id == block->id) - block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; - -- assert(abbrev_count == block->abbrev_count); -+ VKD3D_ASSERT(abbrev_count == block->abbrev_count); - } - - if ((ret = dxil_block_read(block, sm6)) < 0) -@@ -1555,7 +1555,7 @@ static char *dxil_record_to_string(const struct dxil_record *record, unsigned in - unsigned int i; - char *str; - -- assert(offset <= record->operand_count); -+ VKD3D_ASSERT(offset <= record->operand_count); - if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -@@ -1843,7 +1843,7 @@ static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) - ++sm6->type_count; - } - -- assert(sm6->type_count == type_count); -+ VKD3D_ASSERT(sm6->type_count == type_count); - - if (struct_name) - { -@@ -2216,13 +2216,13 @@ static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) - - static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) - { -- assert(sm6_value_is_function_dcl(fn)); -+ VKD3D_ASSERT(sm6_value_is_function_dcl(fn)); - return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); - } - - static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) - { -- assert(sm6->value_count < sm6->value_capacity); -+ VKD3D_ASSERT(sm6->value_count < sm6->value_capacity); - return &sm6->values[sm6->value_count]; - } - -@@ -3404,7 +3404,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa - enum vkd3d_shader_opcode handler_idx) - { - struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); -- assert(ins); -+ VKD3D_ASSERT(ins); - vsir_instruction_init(ins, &sm6->p.location, handler_idx); - ++sm6->p.program->instructions.count; - return ins; -@@ -3651,7 +3651,7 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init - { - const struct sm6_value *value; - -- assert(index); -+ VKD3D_ASSERT(index); - --index; - if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_icb(value) && !sm6_value_is_undef(value))) - { -@@ -3895,7 +3895,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) - param->reg.idx[count++].offset = 0; - -- assert(count < ARRAY_SIZE(param->reg.idx)); -+ VKD3D_ASSERT(count < ARRAY_SIZE(param->reg.idx)); - param->reg.idx[count++].offset = i; - param->reg.idx_count = count; - } -@@ -4866,10 +4866,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr - return; - src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); - register_index_address_init(&src_param->reg.idx[2], operands[1], sm6); -- assert(src_param->reg.idx_count == 3); -+ VKD3D_ASSERT(src_param->reg.idx_count == 3); - - type = sm6_type_get_scalar_type(dst->type, 0); -- assert(type); -+ VKD3D_ASSERT(type); - src_param->reg.data_type = vkd3d_data_type_from_sm6_type(type); - if (data_type_is_64_bit(src_param->reg.data_type)) - src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); -@@ -5342,7 +5342,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin - - if (!is_patch_constant && !operands[3]->is_undefined) - { -- assert(src_param->reg.idx_count > count); -+ VKD3D_ASSERT(src_param->reg.idx_count > count); - register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); - } - -@@ -6400,7 +6400,7 @@ static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_ - - info = &sm6_dx_op_table[op]; - -- assert(info->ret_type[0]); -+ VKD3D_ASSERT(info->ret_type[0]); - if (!sm6_parser_validate_operand_type(sm6, dst, info->ret_type[0], NULL, true)) - { - WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); -@@ -6605,7 +6605,7 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ - else if (to->u.width > from->u.width) - { - op = (code == CAST_ZEXT) ? VKD3DSIH_UTOU : VKD3DSIH_ITOI; -- assert(from->u.width == 1 || to->u.width == 64); -+ VKD3D_ASSERT(from->u.width == 1 || to->u.width == 64); - is_valid = from_int && to_int; - } - break; -@@ -7141,7 +7141,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - - if (ptr->structure_stride) - { -- assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); - - if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -@@ -7324,7 +7324,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco - - if (ptr->structure_stride) - { -- assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); - - if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -@@ -7380,7 +7380,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec - if (!(src = sm6_parser_get_value_by_ref(sm6, record, type, &i)) - || !sm6_value_validate_is_register(src, sm6)) - return; -- assert(i == 2); -+ VKD3D_ASSERT(i == 2); - - if (src->type != type) - { -@@ -7690,7 +7690,7 @@ static void metadata_attachment_record_apply(const struct dxil_record *record, e - "Ignoring a nested metadata attachment."); - } - -- assert(record->operand_count & 1); -+ VKD3D_ASSERT(record->operand_count & 1); - for (i = 1; i < record->operand_count; i += 2) - { - if (!(m = sm6_parser_find_metadata_kind(sm6, record->operands[i]))) -@@ -8053,7 +8053,7 @@ static void sm6_block_emit_terminator(const struct sm6_block *block, struct sm6_ - switch_case = &block->terminator.cases[i]; - if (!(case_block = switch_case->block)) - { -- assert(sm6->p.failed); -+ VKD3D_ASSERT(sm6->p.failed); - continue; - } - if (switch_case->is_default) -@@ -8122,7 +8122,7 @@ static void sm6_block_emit_phi(const struct sm6_block *block, struct sm6_parser - if (incoming_block) - vsir_src_param_init_label(&src_params[index + 1], incoming_block->id); - else -- assert(sm6->p.failed); -+ VKD3D_ASSERT(sm6->p.failed); - } - - dst_param_init(dst_param); -@@ -10541,7 +10541,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - uint32_t *byte_code = NULL; - int ret; - -- ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ MESSAGE("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); - - dxbc_desc.is_dxil = true; - if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 15a518c07db..e3ebbafb3f4 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -56,36 +56,80 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) - vkd3d_free(string_entry); - } - --struct state_block_function_info -+struct function_component -+{ -+ const char *name; -+ bool lhs_has_index; -+ unsigned int lhs_index; -+}; -+ -+static const struct state_block_function_info - { - const char *name; - unsigned int min_args, max_args; -+ const struct function_component components[3]; -+ unsigned int min_profile; -+} -+function_info[] = -+{ -+ {"SetBlendState", 3, 3, { { "AB_BlendFactor" }, { "AB_SampleMask" }, { "BlendState" } }, 4 }, -+ {"SetDepthStencilState", 2, 2, { { "DS_StencilRef" }, { "DepthStencilState" } }, 4 }, -+ {"SetRasterizerState", 1, 1, { { "RasterizerState" } }, 4 }, -+ {"SetVertexShader", 1, 1, { { "VertexShader" } }, 4 }, -+ {"SetDomainShader", 1, 1, { { "DomainShader" } }, 5 }, -+ {"SetHullShader", 1, 1, { { "HullShader" } }, 5 }, -+ {"SetGeometryShader", 1, 1, { { "GeometryShader" } }, 4 }, -+ {"SetPixelShader", 1, 1, { { "PixelShader" } }, 4 }, -+ {"SetComputeShader", 1, 1, { { "ComputeShader" } }, 4 }, -+ {"OMSetRenderTargets", 2, 9, { {0} }, 4 }, - }; - - static const struct state_block_function_info *get_state_block_function_info(const char *name) - { -- static const struct state_block_function_info valid_functions[] = -- { -- {"SetBlendState", 3, 3}, -- {"SetDepthStencilState", 2, 2}, -- {"SetRasterizerState", 1, 1}, -- {"SetVertexShader", 1, 1}, -- {"SetDomainShader", 1, 1}, -- {"SetHullShader", 1, 1}, -- {"SetGeometryShader", 1, 1}, -- {"SetPixelShader", 1, 1}, -- {"SetComputeShader", 1, 1}, -- {"OMSetRenderTargets", 2, 9}, -- }; -- -- for (unsigned int i = 0; i < ARRAY_SIZE(valid_functions); ++i) -+ for (unsigned int i = 0; i < ARRAY_SIZE(function_info); ++i) - { -- if (!strcmp(name, valid_functions[i].name)) -- return &valid_functions[i]; -+ if (!strcmp(name, function_info[i].name)) -+ return &function_info[i]; - } - return NULL; - } - -+static void add_function_component(struct function_component **components, const char *name, -+ bool lhs_has_index, unsigned int lhs_index) -+{ -+ struct function_component *comp = *components; -+ -+ comp->name = name; -+ comp->lhs_has_index = lhs_has_index; -+ comp->lhs_index = lhs_index; -+ -+ *components = *components + 1; -+} -+ -+static void get_state_block_function_components(const struct state_block_function_info *info, -+ struct function_component *components, unsigned int comp_count) -+{ -+ unsigned int i; -+ -+ assert(comp_count <= info->max_args); -+ -+ if (info->min_args == info->max_args) -+ { -+ const struct function_component *c = info->components; -+ for (i = 0; i < comp_count; ++i, ++c) -+ add_function_component(&components, c->name, c->lhs_has_index, c->lhs_index); -+ return; -+ } -+ -+ if (!strcmp(info->name, "OMSetRenderTargets")) -+ { -+ for (i = 0; i < comp_count - 2; ++i) -+ add_function_component(&components, "RenderTargetView", true, i + 1); -+ add_function_component(&components, "DepthStencilView", false, 0); -+ add_function_component(&components, "RenderTargetView", true, 0); -+ } -+} -+ - bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, - const struct vkd3d_shader_location *loc) - { -@@ -189,9 +233,6 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) - - static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -- if (var->state_block_count && var->state_blocks[0]->count) -- hlsl_fixme(fx->ctx, &var->loc, "Write pass assignments."); -- - fx->ops->write_pass(var, fx); - } - -@@ -230,6 +271,8 @@ static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_con - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); - static const char * get_fx_4_type_name(const struct hlsl_type *type); - static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); -+static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, -+ uint32_t count_offset, struct fx_write_context *fx); - - static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -@@ -238,7 +281,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context - unsigned int elements_count; - const char *name; - -- assert(fx->ctx->profile->major_version >= 4); -+ VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); - - if (type->class == HLSL_CLASS_ARRAY) - { -@@ -374,15 +417,14 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f - static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t name_offset; -+ uint32_t name_offset, count_offset; - - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); -- put_u32(buffer, 0); /* Assignment count. */ -+ count_offset = put_u32(buffer, 0); - - write_fx_4_annotations(var->annotations, fx); -- -- /* TODO: assignments */ -+ write_fx_4_state_block(var, 0, count_offset, fx); - } - - static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) -@@ -398,6 +440,9 @@ static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx - /* TODO: annotations */ - /* TODO: assignments */ - -+ if (var->state_block_count && var->state_blocks[0]->count) -+ hlsl_fixme(fx->ctx, &var->loc, "Write pass assignments."); -+ - /* For some reason every pass adds to the total shader object count. */ - fx->shader_count++; - } -@@ -587,6 +632,10 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - put_u32_unaligned(buffer, 2); - break; - -@@ -710,6 +759,18 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - numeric_desc = get_fx_4_numeric_type_description(type, fx); - put_u32_unaligned(buffer, numeric_desc); - } -+ else if (type->class == HLSL_CLASS_COMPUTE_SHADER) -+ { -+ put_u32_unaligned(buffer, 28); -+ } -+ else if (type->class == HLSL_CLASS_HULL_SHADER) -+ { -+ put_u32_unaligned(buffer, 29); -+ } -+ else if (type->class == HLSL_CLASS_DOMAIN_SHADER) -+ { -+ put_u32_unaligned(buffer, 30); -+ } - else - { - FIXME("Type %u is not supported.\n", type->class); -@@ -870,7 +931,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - - /* Validated in check_invalid_object_fields(). */ -- assert(hlsl_is_numeric_type(field->type)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); - write_fx_2_parameter(field->type, field->name, &field->semantic, fx); - } - } -@@ -1013,6 +1074,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - return false; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -1330,11 +1395,8 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_node *value = entry->args->node; - -- if (entry->lhs_has_index) -- hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); -- - put_u32(buffer, entry->name_id); -- put_u32(buffer, 0); /* TODO: destination index */ -+ put_u32(buffer, entry->lhs_index); - type_offset = put_u32(buffer, 0); - rhs_offset = put_u32(buffer, 0); - -@@ -1348,6 +1410,17 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - assignment_type = 1; - break; - } -+ case HLSL_IR_LOAD: -+ { -+ struct hlsl_ir_load *l = hlsl_ir_load(value); -+ -+ if (l->src.path_len) -+ hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); -+ -+ value_offset = write_fx_4_string(l->src.var->name, fx); -+ assignment_type = 2; -+ break; -+ } - default: - hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); - } -@@ -1424,8 +1497,59 @@ enum state_property_component_type - FX_FLOAT, - FX_UINT, - FX_UINT8, -+ FX_DEPTHSTENCIL, -+ FX_RASTERIZER, -+ FX_DOMAINSHADER, -+ FX_HULLSHADER, -+ FX_COMPUTESHADER, -+ FX_TEXTURE, -+ FX_DEPTHSTENCILVIEW, -+ FX_RENDERTARGETVIEW, - }; - -+static inline bool is_object_fx_type(enum state_property_component_type type) -+{ -+ switch (type) -+ { -+ case FX_DEPTHSTENCIL: -+ case FX_RASTERIZER: -+ case FX_DOMAINSHADER: -+ case FX_HULLSHADER: -+ case FX_COMPUTESHADER: -+ case FX_TEXTURE: -+ case FX_RENDERTARGETVIEW: -+ case FX_DEPTHSTENCILVIEW: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static inline enum hlsl_type_class hlsl_type_class_from_fx_type(enum state_property_component_type type) -+{ -+ switch (type) -+ { -+ case FX_DEPTHSTENCIL: -+ return HLSL_CLASS_DEPTH_STENCIL_STATE; -+ case FX_RASTERIZER: -+ return HLSL_CLASS_RASTERIZER_STATE; -+ case FX_DOMAINSHADER: -+ return HLSL_CLASS_DOMAIN_SHADER; -+ case FX_HULLSHADER: -+ return HLSL_CLASS_HULL_SHADER; -+ case FX_COMPUTESHADER: -+ return HLSL_CLASS_COMPUTE_SHADER; -+ case FX_TEXTURE: -+ return HLSL_CLASS_TEXTURE; -+ case FX_RENDERTARGETVIEW: -+ return HLSL_CLASS_RENDER_TARGET_VIEW; -+ case FX_DEPTHSTENCILVIEW: -+ return HLSL_CLASS_DEPTH_STENCIL_VIEW; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ - static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) - { - switch (type) -@@ -1546,48 +1670,61 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - enum hlsl_type_class class; - enum state_property_component_type type; - unsigned int dimx; -+ unsigned int array_size; - uint32_t id; - const struct rhs_named_value *values; - } - states[] = - { -- { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 12, fill_values }, -- { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 13, cull_values }, -- { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 14 }, -- { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 15 }, -- { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 16 }, -- { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 17 }, -- { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 18 }, -- { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 19 }, -- { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 20 }, -- { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 21 }, -- -- { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 22 }, -- { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 23, depth_write_mask_values }, -- { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 24, comparison_values }, -- { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 25 }, -- { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 26 }, -- { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 27 }, -- { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 28, stencil_op_values }, -- { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 29, stencil_op_values }, -- { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 30, stencil_op_values }, -- { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 31, comparison_values }, -- { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 32, stencil_op_values }, -- { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 33, stencil_op_values }, -- { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 34, stencil_op_values }, -- { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 35, comparison_values }, -- -- { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 45, filter_values }, -- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 46, address_values }, -- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 47, address_values }, -- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 48, address_values }, -- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 49 }, -- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 50 }, -- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 51, compare_func_values }, -- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 52 }, -- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 53 }, -- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 54 }, -- /* TODO: "Texture" field */ -+ { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, -+ { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, -+ -+ { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, -+ { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, -+ -+ { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, -+ -+ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, -+ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, -+ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14 }, -+ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, -+ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, -+ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, -+ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18 }, -+ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19 }, -+ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20 }, -+ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21 }, -+ -+ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22 }, -+ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, -+ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, -+ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25 }, -+ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, -+ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, -+ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, -+ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, -+ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, -+ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, -+ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, -+ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, -+ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, -+ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, -+ -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, -+ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55 }, -+ -+ { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, -+ { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, -+ { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, - }; - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - struct replace_state_context replace_context; -@@ -1596,6 +1733,7 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - const struct state *state = NULL; - struct hlsl_ctx *ctx = fx->ctx; - enum hlsl_base_type base_type; -+ struct hlsl_ir_load *load; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(states); ++i) -@@ -1621,6 +1759,27 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - return; - } - -+ if (entry->lhs_has_index && state->array_size == 1) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Can't use array-style access for non-array state %s.", -+ entry->name); -+ return; -+ } -+ -+ if (!entry->lhs_has_index && state->array_size > 1) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected array index for array state %s.", -+ entry->name); -+ return; -+ } -+ -+ if (entry->lhs_has_index && (state->array_size <= entry->lhs_index)) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid element index %u for the state %s[%u].", -+ entry->lhs_index, state->name, state->array_size); -+ return; -+ } -+ - entry->name_id = state->id; - - replace_context.values = state->values; -@@ -1632,6 +1791,32 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - - /* Now cast and run folding again. */ - -+ if (is_object_fx_type(state->type)) -+ { -+ node = entry->args->node; -+ -+ switch (node->type) -+ { -+ case HLSL_IR_LOAD: -+ load = hlsl_ir_load(node); -+ -+ if (load->src.path_len) -+ hlsl_fixme(ctx, &ctx->location, "Arrays are not supported for RHS."); -+ -+ if (load->src.var->data_type->class != hlsl_type_class_from_fx_type(state->type)) -+ { -+ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Type mismatch for the %s state value", -+ entry->name); -+ } -+ -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Unhandled node type for object-typed field."); -+ } -+ -+ return; -+ } -+ - base_type = hlsl_type_from_fx_type(state->type); - switch (state->class) - { -@@ -1676,40 +1861,138 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - } - } - --static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct hlsl_state_block *block, -+ unsigned int entry_index, struct fx_write_context *fx) - { -- uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; -- struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t count_offset, count; -+ struct hlsl_state_block_entry *entry = block->entries[entry_index]; -+ const struct state_block_function_info *info; -+ struct function_component components[9]; -+ struct hlsl_ctx *ctx = fx->ctx; -+ unsigned int i; - -- for (i = 0; i < elements_count; ++i) -+ if (!entry->is_function_call) -+ return 1; -+ -+ if (!(info = get_state_block_function_info(entry->name))) -+ return 1; -+ -+ if (info->min_profile > ctx->profile->major_version) - { -- struct hlsl_state_block *block; -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "State %s is not supported for this profile.", entry->name); -+ return 1; -+ } - -- count_offset = put_u32(buffer, 0); -+ /* For single argument case simply replace the name. */ -+ if (info->min_args == info->max_args && info->min_args == 1) -+ { -+ vkd3d_free(entry->name); -+ entry->name = hlsl_strdup(ctx, info->components[0].name); -+ return 1; -+ } -+ -+ if (!vkd3d_array_reserve((void **)&block->entries, &block->capacity, block->count + entry->args_count - 1, -+ sizeof(*block->entries))) -+ return 1; -+ if (entry_index != block->count - 1) -+ { -+ memmove(&block->entries[entry_index + entry->args_count], &block->entries[entry_index + 1], -+ (block->count - entry_index - 1) * sizeof(*block->entries)); -+ } -+ block->count += entry->args_count - 1; -+ -+ get_state_block_function_components(info, components, entry->args_count); -+ -+ for (i = 0; i < entry->args_count; ++i) -+ { -+ const struct function_component *comp = &components[i]; -+ unsigned int arg_index = (i + 1) % entry->args_count; -+ block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, -+ comp->lhs_has_index, comp->lhs_index, arg_index); -+ } -+ hlsl_free_state_block_entry(entry); -+ -+ return entry->args_count; -+} -+ -+static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, -+ uint32_t count_offset, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ struct hlsl_state_block *block; -+ uint32_t i, count = 0; -+ -+ if (var->state_blocks) -+ { -+ block = var->state_blocks[block_index]; - -- count = 0; -- if (var->state_blocks) -+ for (i = 0; i < block->count;) - { -- block = var->state_blocks[i]; -+ i += decompose_fx_4_state_block(var, block, i, fx); -+ } - -- for (j = 0; j < block->count; ++j) -- { -- struct hlsl_state_block_entry *entry = block->entries[j]; -+ for (i = 0; i < block->count; ++i) -+ { -+ struct hlsl_state_block_entry *entry = block->entries[i]; - -- /* Skip if property is reassigned later. This will use the last assignment. */ -- if (state_block_contains_state(entry->name, j + 1, block)) -- continue; -+ /* Skip if property is reassigned later. This will use the last assignment. */ -+ if (state_block_contains_state(entry->name, i + 1, block)) -+ continue; - -- /* Resolve special constant names and property names. */ -- resolve_fx_4_state_block_values(var, entry, fx); -+ /* Resolve special constant names and property names. */ -+ resolve_fx_4_state_block_values(var, entry, fx); - -- write_fx_4_state_assignment(var, entry, fx); -- ++count; -- } -+ write_fx_4_state_assignment(var, entry, fx); -+ ++count; - } -+ } -+ -+ set_u32(buffer, count_offset, count); -+} -+ -+static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count_offset; -+ -+ for (i = 0; i < elements_count; ++i) -+ { -+ count_offset = put_u32(buffer, 0); -+ -+ write_fx_4_state_block(var, i, count_offset, fx); -+ } -+} -+ -+static void write_fx_4_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); -+ unsigned int i; -+ -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ put_u32(buffer, 0); -+} -+ -+static void write_fx_5_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); -+ unsigned int i; - -- set_u32(buffer, count_offset, count); -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ { -+ put_u32(buffer, 0); /* Blob offset */ -+ put_u32(buffer, 0); /* SODecl[0] offset */ -+ put_u32(buffer, 0); /* SODecl[1] offset */ -+ put_u32(buffer, 0); /* SODecl[2] offset */ -+ put_u32(buffer, 0); /* SODecl[3] offset */ -+ put_u32(buffer, 0); /* SODecl count */ -+ put_u32(buffer, 0); /* Rasterizer stream */ -+ put_u32(buffer, 0); /* Interface bindings count */ -+ put_u32(buffer, 0); /* Interface initializer offset */ - } - } - -@@ -1719,7 +2002,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); - struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t semantic_offset, bind_point = ~0u; -- uint32_t name_offset, type_offset, i; -+ uint32_t name_offset, type_offset; - struct hlsl_ctx *ctx = fx->ctx; - - if (var->reg_reservation.reg_type) -@@ -1756,9 +2039,14 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_VERTEX_SHADER: -- /* FIXME: write shader blobs, once parser support works. */ -- for (i = 0; i < elements_count; ++i) -- put_u32(buffer, 0); -+ write_fx_4_shader_initializer(var, fx); -+ fx->shader_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ write_fx_5_shader_initializer(var, fx); - fx->shader_count += elements_count; - break; - -@@ -1782,8 +2070,8 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - break; - - default: -- hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -- type->e.numeric.type); -+ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object class %u is not implemented.", -+ type->class); - } - - write_fx_4_annotations(var->annotations, fx); -@@ -1883,6 +2171,12 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: - return true; -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ if (ctx->profile->major_version < 5) -+ return false; -+ return true; - case HLSL_CLASS_UAV: - if (ctx->profile->major_version < 5) - return false; -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 8725724a239..10e12ea56f2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -18,10 +18,23 @@ - - #include "vkd3d_shader_private.h" - -+struct glsl_src -+{ -+ struct vkd3d_string_buffer *str; -+}; -+ -+struct glsl_dst -+{ -+ const struct vkd3d_shader_dst_param *vsir; -+ struct vkd3d_string_buffer *register_name; -+ struct vkd3d_string_buffer *mask; -+}; -+ - struct vkd3d_glsl_generator - { - struct vsir_program *program; -- struct vkd3d_string_buffer buffer; -+ struct vkd3d_string_buffer_cache string_buffers; -+ struct vkd3d_string_buffer *buffer; - struct vkd3d_shader_location location; - struct vkd3d_shader_message_context *message_context; - unsigned int indent; -@@ -45,18 +58,149 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne - vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); - } - -+static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, -+ struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) -+{ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); -+ break; -+ -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled register type %#x.", reg->type); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+} -+ -+static void shader_glsl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask) -+{ -+ const char swizzle_chars[] = "xyzw"; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "."); -+ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) -+ { -+ if (mask & (VKD3DSP_WRITEMASK_0 << i)) -+ vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]); -+ } -+} -+ -+static void shader_glsl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask) -+{ -+ vkd3d_string_buffer_printf(buffer, "."); -+ if (write_mask & VKD3DSP_WRITEMASK_0) -+ vkd3d_string_buffer_printf(buffer, "x"); -+ if (write_mask & VKD3DSP_WRITEMASK_1) -+ vkd3d_string_buffer_printf(buffer, "y"); -+ if (write_mask & VKD3DSP_WRITEMASK_2) -+ vkd3d_string_buffer_printf(buffer, "z"); -+ if (write_mask & VKD3DSP_WRITEMASK_3) -+ vkd3d_string_buffer_printf(buffer, "w"); -+} -+ -+static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_cache *cache) -+{ -+ vkd3d_string_buffer_release(cache, src->str); -+} -+ -+static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) -+{ -+ const struct vkd3d_shader_register *reg = &vsir_src->reg; -+ -+ glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ if (reg->non_uniform) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled 'non-uniform' modifer."); -+ if (vsir_src->modifiers) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); -+ -+ shader_glsl_print_register_name(glsl_src->str, gen, reg); -+ if (reg->dimension == VSIR_DIMENSION_VEC4) -+ shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); -+} -+ -+static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) -+{ -+ vkd3d_string_buffer_release(cache, dst->mask); -+ vkd3d_string_buffer_release(cache, dst->register_name); -+} -+ -+static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst) -+{ -+ uint32_t write_mask = vsir_dst->write_mask; -+ -+ if (ins->flags & VKD3DSI_PRECISE_XYZW) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled 'precise' modifer."); -+ if (vsir_dst->reg.non_uniform) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled 'non-uniform' modifer."); -+ -+ glsl_dst->vsir = vsir_dst; -+ glsl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); -+ glsl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ shader_glsl_print_register_name(glsl_dst->register_name, gen, &vsir_dst->reg); -+ shader_glsl_print_write_mask(glsl_dst->mask, write_mask); -+ -+ return write_mask; -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( -+ struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) -+{ -+ va_list args; -+ -+ if (dst->vsir->shift) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); -+ if (dst->vsir->modifiers) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); -+ -+ va_start(args, format); -+ vkd3d_string_buffer_vprintf(gen->buffer, format, args); -+ va_end(args); -+ -+ vkd3d_string_buffer_printf(gen->buffer, ";\n"); -+} -+ - static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -- shader_glsl_print_indent(&gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->opcode); -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "/* */\n", ins->opcode); - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled instruction %#x.", ins->opcode); - } - --static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, -- const struct vkd3d_shader_instruction *ins) -+static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src; -+ struct glsl_dst dst; -+ uint32_t mask; -+ -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src, gen, &ins->src[0], mask); -+ -+ shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -- const struct vkd3d_shader_version *version = &generator->program->shader_version; -+ const struct vkd3d_shader_version *version = &gen->program->shader_version; - - /* - * TODO: Implement in_subroutine -@@ -64,45 +208,59 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - */ - if (version->major >= 4) - { -- shader_glsl_print_indent(&generator->buffer, generator->indent); -- vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "return;\n"); - } - } - --static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, -- const struct vkd3d_shader_instruction *instruction) -+static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins) - { -- generator->location = instruction->location; -+ gen->location = ins->location; - -- switch (instruction->opcode) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; -+ case VKD3DSIH_MOV: -+ shader_glsl_mov(gen, ins); -+ break; - case VKD3DSIH_RET: -- shader_glsl_ret(generator, instruction); -+ shader_glsl_ret(gen, ins); - break; - default: -- shader_glsl_unhandled(generator, instruction); -+ shader_glsl_unhandled(gen, ins); - break; - } - } - -+static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) -+{ -+ const struct vsir_program *program = gen->program; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ -+ if (program->temp_count) -+ vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); -+} -+ - static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) - { - const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; -- struct vkd3d_string_buffer *buffer = &gen->buffer; -+ struct vkd3d_string_buffer *buffer = gen->buffer; - unsigned int i; - void *code; - -- ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); - - vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); - - vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); - -+ shader_glsl_generate_declarations(gen); -+ - vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); - - ++gen->indent; -@@ -132,7 +290,8 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc - - static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) - { -- vkd3d_string_buffer_cleanup(&gen->buffer); -+ vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); -+ vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); - } - - static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, -@@ -140,7 +299,8 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - { - memset(gen, 0, sizeof(*gen)); - gen->program = program; -- vkd3d_string_buffer_init(&gen->buffer); -+ vkd3d_string_buffer_cache_init(&gen->string_buffers); -+ gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); - gen->message_context = message_context; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 1526d7b02a9..7f85195382d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -134,7 +134,7 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) - return hlsl_get_var(scope->upper, name); - } - --static void free_state_block_entry(struct hlsl_state_block_entry *entry) -+void hlsl_free_state_block_entry(struct hlsl_state_block_entry *entry) - { - unsigned int i; - -@@ -151,9 +151,9 @@ void hlsl_free_state_block(struct hlsl_state_block *state_block) - { - unsigned int k; - -- assert(state_block); -+ VKD3D_ASSERT(state_block); - for (k = 0; k < state_block->count; ++k) -- free_state_block_entry(state_block->entries[k]); -+ hlsl_free_state_block_entry(state_block->entries[k]); - vkd3d_free(state_block->entries); - vkd3d_free(state_block); - } -@@ -381,6 +381,10 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - break; - } - } -@@ -451,6 +455,10 @@ static bool type_is_single_component(const struct hlsl_type *type) - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - return true; - - case HLSL_CLASS_VECTOR: -@@ -482,13 +490,13 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - struct hlsl_type *type = *type_ptr; - unsigned int index = *index_ptr; - -- assert(!type_is_single_component(type)); -- assert(index < hlsl_type_component_count(type)); -+ VKD3D_ASSERT(!type_is_single_component(type)); -+ VKD3D_ASSERT(index < hlsl_type_component_count(type)); - - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- assert(index < type->dimx); -+ VKD3D_ASSERT(index < type->dimx); - *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); - *index_ptr = 0; - return index; -@@ -498,7 +506,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - unsigned int y = index / type->dimx, x = index % type->dimx; - bool row_major = hlsl_type_is_row_major(type); - -- assert(index < type->dimx * type->dimy); -+ VKD3D_ASSERT(index < type->dimx * type->dimy); - *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); - *index_ptr = row_major ? x : y; - return row_major ? y : x; -@@ -512,7 +520,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - *type_ptr = type->e.array.type; - *index_ptr = index % elem_comp_count; - array_index = index / elem_comp_count; -- assert(array_index < type->e.array.elements_count); -+ VKD3D_ASSERT(array_index < type->e.array.elements_count); - return array_index; - } - -@@ -603,7 +611,11 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -- assert(idx == 0); -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ VKD3D_ASSERT(idx == 0); - break; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -658,9 +670,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - deref->rel_offset.node = NULL; - deref->const_offset = 0; - -- assert(chain); -+ VKD3D_ASSERT(chain); - if (chain->type == HLSL_IR_INDEX) -- assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); -+ VKD3D_ASSERT(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); - - /* Find the length of the index chain */ - chain_len = 0; -@@ -707,7 +719,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - chain_len++; - ptr = index->val.node; - } -- assert(deref->path_len == load->src.path_len + chain_len); -+ VKD3D_ASSERT(deref->path_len == load->src.path_len + chain_len); - - return true; - } -@@ -717,7 +729,7 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de - struct hlsl_type *type; - unsigned int i; - -- assert(deref); -+ VKD3D_ASSERT(deref); - - if (hlsl_deref_is_lowered(deref)) - return deref->data_type; -@@ -772,7 +784,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl - hlsl_src_from_node(&deref->path[deref_path_len++], c); - } - -- assert(deref_path_len == deref->path_len); -+ VKD3D_ASSERT(deref_path_len == deref->path_len); - - return true; - } -@@ -780,7 +792,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl - struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, const struct hlsl_type *type, - struct hlsl_ir_node *idx) - { -- assert(idx); -+ VKD3D_ASSERT(idx); - - switch (type->class) - { -@@ -800,7 +812,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co - { - struct hlsl_ir_constant *c = hlsl_ir_constant(idx); - -- assert(c->value.u[0].u < type->e.record.field_count); -+ VKD3D_ASSERT(c->value.u[0].u < type->e.record.field_count); - return type->e.record.fields[c->value.u[0].u].type; - } - -@@ -1003,6 +1015,10 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - return 1; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -1090,6 +1106,10 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - case HLSL_CLASS_STRING: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - return true; - } - -@@ -1310,7 +1330,7 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc - if (!other) - return true; - -- assert(!hlsl_deref_is_lowered(other)); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(other)); - - if (!init_deref(ctx, deref, other->var, other->path_len)) - return false; -@@ -1367,8 +1387,8 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls - struct hlsl_ir_store *store; - unsigned int i; - -- assert(lhs); -- assert(!hlsl_deref_is_lowered(lhs)); -+ VKD3D_ASSERT(lhs); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); - - if (!(store = hlsl_alloc(ctx, sizeof(*store)))) - return NULL; -@@ -1439,7 +1459,7 @@ struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *t - { - struct hlsl_ir_constant *c; - -- assert(type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR); - - if (!(c = hlsl_alloc(ctx, sizeof(*c)))) - return NULL; -@@ -1484,6 +1504,24 @@ struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n - return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); - } - -+struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_string_constant *s; -+ -+ if (!(s = hlsl_alloc(ctx, sizeof(*s)))) -+ return NULL; -+ -+ init_node(&s->node, HLSL_IR_STRING_CONSTANT, ctx->builtin_types.string, loc); -+ -+ if (!(s->string = hlsl_strdup(ctx, str))) -+ { -+ hlsl_free_instr(&s->node); -+ return NULL; -+ } -+ return &s->node; -+} -+ - struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], - struct hlsl_type *data_type, const struct vkd3d_shader_location *loc) -@@ -1513,7 +1551,7 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; - -- assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -@@ -1522,8 +1560,8 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; - -- assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -- assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -@@ -1585,7 +1623,7 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl - struct hlsl_type *type; - unsigned int i; - -- assert(!hlsl_deref_is_lowered(deref)); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); - - type = hlsl_deref_get_type(ctx, deref); - if (idx) -@@ -1614,7 +1652,7 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls - /* This deref can only exists temporarily because it is not the real owner of its members. */ - struct hlsl_deref tmp_deref; - -- assert(deref->path_len >= 1); -+ VKD3D_ASSERT(deref->path_len >= 1); - - tmp_deref = *deref; - tmp_deref.path_len = deref->path_len - 1; -@@ -1719,7 +1757,7 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - - if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) - return NULL; -- assert(hlsl_is_numeric_type(val->data_type)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); - if (components == 1) - type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); - else -@@ -1893,7 +1931,7 @@ static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, - { - unsigned int i; - -- assert(!hlsl_deref_is_lowered(src)); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(src)); - - if (!init_deref(ctx, dst, src->var, src->path_len)) - return false; -@@ -2039,6 +2077,11 @@ static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx, - return &dst->node; - } - -+static struct hlsl_ir_node *clone_string_constant(struct hlsl_ctx *ctx, struct hlsl_ir_string_constant *src) -+{ -+ return hlsl_new_string_constant(ctx, src->string, &src->node.loc); -+} -+ - static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_store *src) - { - struct hlsl_ir_store *dst; -@@ -2081,6 +2124,43 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, - return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); - } - -+struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, -+ struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -+ unsigned int lhs_index, unsigned int arg_index) -+{ -+ struct hlsl_state_block_entry *entry; -+ struct clone_instr_map map = { 0 }; -+ -+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) -+ return NULL; -+ entry->name = hlsl_strdup(ctx, name); -+ entry->lhs_has_index = lhs_has_index; -+ entry->lhs_index = lhs_index; -+ if (!(entry->instrs = hlsl_alloc(ctx, sizeof(*entry->instrs)))) -+ { -+ hlsl_free_state_block_entry(entry); -+ return NULL; -+ } -+ -+ entry->args_count = 1; -+ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) -+ { -+ hlsl_free_state_block_entry(entry); -+ return NULL; -+ } -+ -+ hlsl_block_init(entry->instrs); -+ if (!clone_block(ctx, entry->instrs, src->instrs, &map)) -+ { -+ hlsl_free_state_block_entry(entry); -+ return NULL; -+ } -+ clone_src(&map, entry->args, &src->args[arg_index]); -+ vkd3d_free(map.instrs); -+ -+ return entry; -+} -+ - void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) - { - hlsl_block_cleanup(&c->body); -@@ -2168,6 +2248,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - case HLSL_IR_RESOURCE_STORE: - return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr)); - -+ case HLSL_IR_STRING_CONSTANT: -+ return clone_string_constant(ctx, hlsl_ir_string_constant(instr)); -+ - case HLSL_IR_STORE: - return clone_store(ctx, map, hlsl_ir_store(instr)); - -@@ -2296,7 +2379,7 @@ void hlsl_pop_scope(struct hlsl_ctx *ctx) - { - struct hlsl_scope *prev_scope = ctx->cur_scope->upper; - -- assert(prev_scope); -+ VKD3D_ASSERT(prev_scope); - TRACE("Popping current scope.\n"); - ctx->cur_scope = prev_scope; - } -@@ -2374,17 +2457,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - switch (type->class) - { - case HLSL_CLASS_SCALAR: -- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); - return string; - - case HLSL_CLASS_VECTOR: -- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); - return string; - - case HLSL_CLASS_MATRIX: -- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); - return string; - -@@ -2422,15 +2505,15 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - } - -- assert(hlsl_is_numeric_type(type->e.resource.format)); -- assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); -+ VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - { - vkd3d_string_buffer_printf(string, "Buffer"); - } - else - { -- assert(type->sampler_dim < ARRAY_SIZE(dimensions)); -+ VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); - vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); - } - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -@@ -2475,6 +2558,10 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - break; - } - -@@ -2571,19 +2658,21 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - { - static const char * const names[] = - { -- [HLSL_IR_CALL ] = "HLSL_IR_CALL", -- [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", -- [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", -- [HLSL_IR_IF ] = "HLSL_IR_IF", -- [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", -- [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", -- [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", -- [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", -- [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", -- [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", -- [HLSL_IR_STORE ] = "HLSL_IR_STORE", -- [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", -- [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", -+ [HLSL_IR_CALL ] = "HLSL_IR_CALL", -+ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", -+ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", -+ [HLSL_IR_IF ] = "HLSL_IR_IF", -+ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", -+ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", -+ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", -+ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", -+ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", -+ [HLSL_IR_RESOURCE_STORE ] = "HLSL_IR_RESOURCE_STORE", -+ [HLSL_IR_STRING_CONSTANT] = "HLSL_IR_STRING_CONSTANT", -+ [HLSL_IR_STORE ] = "HLSL_IR_STORE", -+ [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", -+ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", -+ [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -2602,7 +2691,7 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", - }; - -- assert(type < ARRAY_SIZE(names)); -+ VKD3D_ASSERT(type < ARRAY_SIZE(names)); - return names[type]; - } - -@@ -2692,7 +2781,7 @@ const char *debug_hlsl_writemask(unsigned int writemask) - char string[5]; - unsigned int i = 0, pos = 0; - -- assert(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); -+ VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); - - while (writemask) - { -@@ -2711,7 +2800,7 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) - char string[5]; - unsigned int i; - -- assert(size <= ARRAY_SIZE(components)); -+ VKD3D_ASSERT(size <= ARRAY_SIZE(components)); - for (i = 0; i < size; ++i) - string[i] = components[hlsl_swizzle_get_component(swizzle, i)]; - string[size] = 0; -@@ -2808,6 +2897,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_DSY_COARSE] = "dsy_coarse", - [HLSL_OP1_DSY_FINE] = "dsy_fine", - [HLSL_OP1_EXP2] = "exp2", -+ [HLSL_OP1_F16TOF32] = "f16tof32", - [HLSL_OP1_FLOOR] = "floor", - [HLSL_OP1_FRACT] = "fract", - [HLSL_OP1_LOG2] = "log2", -@@ -2935,7 +3025,7 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - [HLSL_RESOURCE_RESINFO] = "resinfo", - }; - -- assert(load->load_type < ARRAY_SIZE(type_names)); -+ VKD3D_ASSERT(load->load_type < ARRAY_SIZE(type_names)); - vkd3d_string_buffer_printf(buffer, "%s(resource = ", type_names[load->load_type]); - dump_deref(buffer, &load->resource); - vkd3d_string_buffer_printf(buffer, ", sampler = "); -@@ -2989,6 +3079,11 @@ static void dump_ir_resource_store(struct vkd3d_string_buffer *buffer, const str - vkd3d_string_buffer_printf(buffer, ")"); - } - -+static void dump_ir_string(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_string_constant *string) -+{ -+ vkd3d_string_buffer_printf(buffer, "\"%s\"", debugstr_a(string->string)); -+} -+ - static void dump_ir_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) - { - vkd3d_string_buffer_printf(buffer, "= ("); -@@ -3108,6 +3203,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - dump_ir_resource_store(buffer, hlsl_ir_resource_store(instr)); - break; - -+ case HLSL_IR_STRING_CONSTANT: -+ dump_ir_string(buffer, hlsl_ir_string_constant(instr)); -+ break; -+ - case HLSL_IR_STORE: - dump_ir_store(buffer, hlsl_ir_store(instr)); - break; -@@ -3177,8 +3276,8 @@ void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) - { - struct hlsl_src *src, *next; - -- assert(old->data_type->dimx == new->data_type->dimx); -- assert(old->data_type->dimy == new->data_type->dimy); -+ VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx); -+ VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy); - - LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) - { -@@ -3286,6 +3385,12 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) - vkd3d_free(load); - } - -+static void free_ir_string_constant(struct hlsl_ir_string_constant *string) -+{ -+ vkd3d_free(string->string); -+ vkd3d_free(string); -+} -+ - static void free_ir_resource_store(struct hlsl_ir_resource_store *store) - { - hlsl_cleanup_deref(&store->resource); -@@ -3330,7 +3435,7 @@ static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *cons - - void hlsl_free_instr(struct hlsl_ir_node *node) - { -- assert(list_empty(&node->uses)); -+ VKD3D_ASSERT(list_empty(&node->uses)); - - switch (node->type) - { -@@ -3370,6 +3475,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - free_ir_resource_load(hlsl_ir_resource_load(node)); - break; - -+ case HLSL_IR_STRING_CONSTANT: -+ free_ir_string_constant(hlsl_ir_string_constant(node)); -+ break; -+ - case HLSL_IR_RESOURCE_STORE: - free_ir_resource_store(hlsl_ir_resource_store(node)); - break; -@@ -3813,6 +3922,7 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - ctx->builtin_types.sampler[bt] = type; - } - -+ ctx->builtin_types.string = hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING); - ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); -@@ -3821,9 +3931,12 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); -- hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "ComputeShader", HLSL_CLASS_COMPUTE_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DomainShader", HLSL_CLASS_DOMAIN_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "HullShader", HLSL_CLASS_HULL_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "GeometryShader", HLSL_CLASS_GEOMETRY_SHADER)); - - for (i = 0; i < ARRAY_SIZE(effect_types); ++i) - { -@@ -4154,6 +4267,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct - /* Save and restore everything that matters. - * Note that saving the scope stack is hard, and shouldn't be necessary. */ - -+ hlsl_push_scope(ctx); - ctx->scanner = NULL; - ctx->internal_func_name = internal_name->buffer; - ctx->cur_function = NULL; -@@ -4161,6 +4275,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct - ctx->scanner = saved_scanner; - ctx->internal_func_name = saved_internal_func_name; - ctx->cur_function = saved_cur_function; -+ hlsl_pop_scope(ctx); - if (ret) - { - ERR("Failed to compile intrinsic, error %u.\n", ret); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 4411546e269..7e8cd774ae2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -91,6 +91,10 @@ enum hlsl_type_class - HLSL_CLASS_TEXTURE, - HLSL_CLASS_UAV, - HLSL_CLASS_VERTEX_SHADER, -+ HLSL_CLASS_COMPUTE_SHADER, -+ HLSL_CLASS_DOMAIN_SHADER, -+ HLSL_CLASS_HULL_SHADER, -+ HLSL_CLASS_GEOMETRY_SHADER, - HLSL_CLASS_CONSTANT_BUFFER, - HLSL_CLASS_VOID, - }; -@@ -306,6 +310,7 @@ enum hlsl_ir_node_type - HLSL_IR_JUMP, - HLSL_IR_RESOURCE_LOAD, - HLSL_IR_RESOURCE_STORE, -+ HLSL_IR_STRING_CONSTANT, - HLSL_IR_STORE, - HLSL_IR_SWIZZLE, - HLSL_IR_SWITCH, -@@ -653,6 +658,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_DSY_COARSE, - HLSL_OP1_DSY_FINE, - HLSL_OP1_EXP2, -+ HLSL_OP1_F16TOF32, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, - HLSL_OP1_LOG2, -@@ -838,6 +844,12 @@ struct hlsl_ir_constant - struct hlsl_reg reg; - }; - -+struct hlsl_ir_string_constant -+{ -+ struct hlsl_ir_node node; -+ char *string; -+}; -+ - /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, - * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ - struct hlsl_ir_stateblock_constant -@@ -984,6 +996,7 @@ struct hlsl_ctx - /* matrix[HLSL_TYPE_FLOAT][1][3] is a float4x2, i.e. dimx = 2, dimy = 4 */ - struct hlsl_type *matrix[HLSL_TYPE_LAST_SCALAR + 1][4][4]; - struct hlsl_type *sampler[HLSL_SAMPLER_DIM_LAST_SAMPLER + 1]; -+ struct hlsl_type *string; - struct hlsl_type *Void; - } builtin_types; - -@@ -1001,6 +1014,8 @@ struct hlsl_ctx - } *regs; - size_t count, size; - } constant_defs; -+ /* 'c' registers where the constants expected by SM2 sincos are stored. */ -+ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register - * index that will be used in the output bytecode (+1). */ - uint32_t temp_count; -@@ -1047,85 +1062,91 @@ struct hlsl_resource_load_params - - static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_CALL); -+ VKD3D_ASSERT(node->type == HLSL_IR_CALL); - return CONTAINING_RECORD(node, struct hlsl_ir_call, node); - } - - static inline struct hlsl_ir_constant *hlsl_ir_constant(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(node->type == HLSL_IR_CONSTANT); - return CONTAINING_RECORD(node, struct hlsl_ir_constant, node); - } - -+static inline struct hlsl_ir_string_constant *hlsl_ir_string_constant(const struct hlsl_ir_node *node) -+{ -+ VKD3D_ASSERT(node->type == HLSL_IR_STRING_CONSTANT); -+ return CONTAINING_RECORD(node, struct hlsl_ir_string_constant, node); -+} -+ - static inline struct hlsl_ir_expr *hlsl_ir_expr(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_EXPR); -+ VKD3D_ASSERT(node->type == HLSL_IR_EXPR); - return CONTAINING_RECORD(node, struct hlsl_ir_expr, node); - } - - static inline struct hlsl_ir_if *hlsl_ir_if(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_IF); -+ VKD3D_ASSERT(node->type == HLSL_IR_IF); - return CONTAINING_RECORD(node, struct hlsl_ir_if, node); - } - - static inline struct hlsl_ir_jump *hlsl_ir_jump(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_JUMP); -+ VKD3D_ASSERT(node->type == HLSL_IR_JUMP); - return CONTAINING_RECORD(node, struct hlsl_ir_jump, node); - } - - static inline struct hlsl_ir_load *hlsl_ir_load(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_LOAD); -+ VKD3D_ASSERT(node->type == HLSL_IR_LOAD); - return CONTAINING_RECORD(node, struct hlsl_ir_load, node); - } - - static inline struct hlsl_ir_loop *hlsl_ir_loop(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_LOOP); -+ VKD3D_ASSERT(node->type == HLSL_IR_LOOP); - return CONTAINING_RECORD(node, struct hlsl_ir_loop, node); - } - - static inline struct hlsl_ir_resource_load *hlsl_ir_resource_load(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_RESOURCE_LOAD); -+ VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_LOAD); - return CONTAINING_RECORD(node, struct hlsl_ir_resource_load, node); - } - - static inline struct hlsl_ir_resource_store *hlsl_ir_resource_store(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_RESOURCE_STORE); -+ VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_STORE); - return CONTAINING_RECORD(node, struct hlsl_ir_resource_store, node); - } - - static inline struct hlsl_ir_store *hlsl_ir_store(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_STORE); -+ VKD3D_ASSERT(node->type == HLSL_IR_STORE); - return CONTAINING_RECORD(node, struct hlsl_ir_store, node); - } - - static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_SWIZZLE); -+ VKD3D_ASSERT(node->type == HLSL_IR_SWIZZLE); - return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); - } - - static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_INDEX); -+ VKD3D_ASSERT(node->type == HLSL_IR_INDEX); - return CONTAINING_RECORD(node, struct hlsl_ir_index, node); - } - - static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_SWITCH); -+ VKD3D_ASSERT(node->type == HLSL_IR_SWITCH); - return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); - } - - static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); -+ VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); - return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); - } - -@@ -1306,6 +1327,9 @@ void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); - - bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, - const struct vkd3d_shader_location *loc); -+struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, -+ struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -+ unsigned int lhs_index, unsigned int arg_index); - - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -@@ -1329,6 +1353,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr); - void hlsl_free_instr(struct hlsl_ir_node *node); - void hlsl_free_instr_list(struct list *list); - void hlsl_free_state_block(struct hlsl_state_block *state_block); -+void hlsl_free_state_block_entry(struct hlsl_state_block_entry *state_block_entry); - void hlsl_free_type(struct hlsl_type *type); - void hlsl_free_var(struct hlsl_ir_var *decl); - -@@ -1412,6 +1437,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, - struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, -+ const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, - struct hlsl_type *type, const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index 55993dac2b4..b4db142f6c2 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -29,6 +29,8 @@ - - static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); - -+static void apply_escape_sequences(char *str); -+ - #define YY_USER_ACTION update_location(yyget_extra(yyscanner), yyget_lloc(yyscanner)); - - %} -@@ -53,7 +55,7 @@ RESERVED4 typename|union|using|virtual - - WS [ \t] - NEWLINE (\n)|(\r\n) --STRING \"[^\"]*\" -+STRING \"([^\"\\]|\\.)*\" - IDENTIFIER [A-Za-z_][A-Za-z0-9_]* - - ANY (.) -@@ -208,6 +210,16 @@ while {return KW_WHILE; } - return NEW_IDENTIFIER; - } - -+{STRING} { -+ struct hlsl_ctx *ctx = yyget_extra(yyscanner); -+ char *string = hlsl_strdup(ctx, yytext + 1); -+ -+ string[strlen(string) - 1] = 0; -+ apply_escape_sequences(string); -+ yylval->name = string; -+ return STRING; -+ } -+ - [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[h|H|f|F]? { - yylval->floatval = atof(yytext); - return C_FLOAT; -@@ -292,6 +304,7 @@ while {return KW_WHILE; } - - BEGIN(pp_ignore); - string[strlen(string) - 1] = 0; -+ apply_escape_sequences(string); - yylval->name = string; - return STRING; - } -@@ -341,3 +354,115 @@ int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hls - yylex_destroy(ctx->scanner); - return ret; - } -+ -+static void apply_escape_sequences(char *str) -+{ -+ unsigned int i = 0, k = 0, r; -+ -+ while (str[i]) -+ { -+ unsigned char v = 0; -+ -+ if (str[i] != '\\') -+ { -+ str[k++] = str[i]; -+ ++i; -+ continue; -+ } -+ -+ ++i; -+ VKD3D_ASSERT(str[i]); -+ -+ if ('0' <= str[i] && str[i] <= '7') -+ { -+ /* octal, up to 3 digits. */ -+ for (r = 0; r < 3; ++r) -+ { -+ char c = str[i]; -+ -+ if ('0' <= c && c <= '7') -+ { -+ v = v << 3; -+ v += c - '0'; -+ ++i; -+ } -+ else -+ break; -+ } -+ str[k++] = v; -+ continue; -+ } -+ -+ if (str[i] == 'x') -+ { -+ bool number = false; -+ -+ /* hexadecimal */ -+ ++i; -+ while (1) -+ { -+ char c = str[i]; -+ -+ if ('0' <= c && c <= '9') -+ { -+ v = v << 4; -+ v += c - '0'; -+ number = true; -+ ++i; -+ } -+ else if ('a' <= c && c <= 'f') -+ { -+ v = v << 4; -+ v += c - 'a' + 10; -+ number = true; -+ ++i; -+ } -+ else if ('A' <= c && c <= 'F') -+ { -+ v = v << 4; -+ v += c - 'A' + 10; -+ number = true; -+ ++i; -+ } -+ else -+ break; -+ } -+ if (number) -+ str[k++] = v; -+ else -+ str[k++] = 'x'; -+ continue; -+ } -+ -+ switch (str[i]) -+ { -+ case 'a': -+ str[k++] = '\a'; -+ break; -+ case 'b': -+ str[k++] = '\b'; -+ break; -+ case 'f': -+ str[k++] = '\f'; -+ break; -+ case 'n': -+ str[k++] = '\n'; -+ break; -+ case 'r': -+ str[k++] = '\r'; -+ break; -+ case 't': -+ str[k++] = '\t'; -+ break; -+ case 'v': -+ str[k++] = '\v'; -+ break; -+ -+ default: -+ str[k++] = str[i]; -+ break; -+ } -+ ++i; -+ } -+ str[k++] = '\0'; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 312eaec8a73..30bd53d0c49 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -344,11 +344,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl - broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1; - matrix_cast = !broadcast && dst_comp_count != src_comp_count - && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; -- assert(src_comp_count >= dst_comp_count || broadcast); -+ VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); - if (matrix_cast) - { -- assert(dst_type->dimx <= src_type->dimx); -- assert(dst_type->dimy <= src_type->dimy); -+ VKD3D_ASSERT(dst_type->dimx <= src_type->dimx); -+ VKD3D_ASSERT(dst_type->dimy <= src_type->dimy); - } - - if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) -@@ -590,6 +590,7 @@ static union hlsl_constant_value_component evaluate_static_expression(struct hls - { - case HLSL_IR_CONSTANT: - case HLSL_IR_EXPR: -+ case HLSL_IR_STRING_CONSTANT: - case HLSL_IR_SWIZZLE: - case HLSL_IR_LOAD: - case HLSL_IR_INDEX: -@@ -633,6 +634,10 @@ static union hlsl_constant_value_component evaluate_static_expression(struct hls - constant = hlsl_ir_constant(node); - ret = constant->value.u[0]; - } -+ else if (node->type == HLSL_IR_STRING_CONSTANT) -+ { -+ hlsl_fixme(ctx, &node->loc, "Evaluate string constants as static expressions."); -+ } - else - { - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -@@ -755,7 +760,7 @@ static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) - { - unsigned int i = 0; - -- assert(attr_list); -+ VKD3D_ASSERT(attr_list); - for (i = 0; i < attr_list->count; ++i) - hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); - vkd3d_free(attr_list->attrs); -@@ -915,7 +920,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, st - { - struct hlsl_ir_node *index, *c; - -- assert(idx < record->data_type->e.record.field_count); -+ VKD3D_ASSERT(idx < record->data_type->e.record.field_count); - - if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) - return false; -@@ -1045,7 +1050,7 @@ static void free_parse_variable_def(struct parse_variable_def *v) - vkd3d_free(v->arrays.sizes); - vkd3d_free(v->name); - hlsl_cleanup_semantic(&v->semantic); -- assert(!v->state_blocks); -+ VKD3D_ASSERT(!v->state_blocks); - vkd3d_free(v); - } - -@@ -1056,7 +1061,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - size_t i = 0; - - if (type->class == HLSL_CLASS_MATRIX) -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - memset(fields, 0, sizeof(*fields)); - fields->count = list_count(defs); -@@ -1199,7 +1204,7 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - struct hlsl_ir_var *var; - - if (param->type->class == HLSL_CLASS_MATRIX) -- assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -@@ -2028,7 +2033,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - { - enum hlsl_ir_expr_op op = op_from_assignment(assign_op); - -- assert(op); -+ VKD3D_ASSERT(op); - if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) - return NULL; - } -@@ -2055,7 +2060,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - struct hlsl_ir_node *new_swizzle; - uint32_t s = swizzle->swizzle; - -- assert(!matrix_writemask); -+ VKD3D_ASSERT(!matrix_writemask); - - if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) - { -@@ -2111,7 +2116,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - return NULL; - - resource_type = hlsl_deref_get_type(ctx, &resource_deref); -- assert(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); -+ VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); - - if (resource_type->class != HLSL_CLASS_UAV) - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -@@ -2123,9 +2128,9 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, - "Resource store expressions must write to all components."); - -- assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -- assert(coords->data_type->dimx == dim_count); -+ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(coords->data_type->dimx == dim_count); - - if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) - { -@@ -2177,7 +2182,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - struct hlsl_ir_node *mat = row->val.node; - unsigned int i, k = 0; - -- assert(!matrix_writemask); -+ VKD3D_ASSERT(!matrix_writemask); - - for (i = 0; i < mat->data_type->dimx; ++i) - { -@@ -2450,10 +2455,10 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - char *var_name; - unsigned int i; - -- assert(basic_type); -+ VKD3D_ASSERT(basic_type); - - if (basic_type->class == HLSL_CLASS_MATRIX) -- assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - type = basic_type; - -@@ -2858,7 +2863,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - struct hlsl_ir_node *call; - unsigned int i, j; - -- assert(args->args_count <= func->parameters.count); -+ VKD3D_ASSERT(args->args_count <= func->parameters.count); - - for (i = 0; i < args->args_count; ++i) - { -@@ -2892,7 +2897,7 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - unsigned int comp_count = hlsl_type_component_count(param->data_type); - struct hlsl_deref param_deref; - -- assert(param->default_values); -+ VKD3D_ASSERT(param->default_values); - - hlsl_init_simple_deref_from_var(¶m_deref, param); - -@@ -3065,6 +3070,19 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - return convert_args(ctx, params, type, loc); - } - -+static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) -+ return false; -+ -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); -+ -+ return convert_args(ctx, params, type, loc); -+} -+ - static bool intrinsic_abs(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3770,6 +3788,21 @@ static bool intrinsic_faceforward(struct hlsl_ctx *ctx, - return add_user_call(ctx, func, params, loc); - } - -+static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *type; -+ -+ if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) -+ return false; -+ -+ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT); -+ -+ operands[0] = params->args[0]; -+ return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); -+} -+ - static bool intrinsic_floor(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4063,12 +4096,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - } - else if (vect_count == 1) - { -- assert(matrix_type->dimx == 1 || matrix_type->dimy == 1); -+ VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1); - ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy); - } - else - { -- assert(matrix_type->dimx == 1 && matrix_type->dimy == 1); -+ VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1); - ret_type = hlsl_get_scalar_type(ctx, base); - } - -@@ -4242,7 +4275,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, - return false; - } - -- assert(params->args_count == 3); -+ VKD3D_ASSERT(params->args_count == 3); - mut_params = *params; - mut_params.args_count = 2; - if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) -@@ -4870,6 +4903,7 @@ intrinsic_functions[] = - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, - {"exp2", 1, true, intrinsic_exp2}, -+ {"f16tof32", 1, true, intrinsic_f16tof32}, - {"faceforward", 3, true, intrinsic_faceforward}, - {"floor", 1, true, intrinsic_floor}, - {"fmod", 2, true, intrinsic_fmod}, -@@ -5101,7 +5135,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - common_type = first->data_type; - } - -- assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - args[0] = cond; - args[1] = first; -@@ -6601,7 +6635,7 @@ func_declaration: - - if (!$1.first) - { -- assert(decl->parameters.count == $1.parameters.count); -+ VKD3D_ASSERT(decl->parameters.count == $1.parameters.count); - - for (i = 0; i < $1.parameters.count; ++i) - { -@@ -6716,7 +6750,7 @@ func_prototype_no_attrs: - * brittle and ugly. - */ - -- assert($5.count == params->count); -+ VKD3D_ASSERT($5.count == params->count); - for (i = 0; i < params->count; ++i) - { - struct hlsl_ir_var *orig_param = params->vars[i]; -@@ -7304,6 +7338,10 @@ type_no_void: - validate_uav_type(ctx, $1, $3, &@3); - $$ = hlsl_new_uav_type(ctx, $1, $3, true); - } -+ | KW_STRING -+ { -+ $$ = ctx->builtin_types.string; -+ } - | TYPE_IDENTIFIER - { - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); -@@ -7368,6 +7406,22 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); - } -+ | KW_COMPUTESHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "ComputeShader", true, true); -+ } -+ | KW_DOMAINSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "DomainShader", true, true); -+ } -+ | KW_HULLSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "HullShader", true, true); -+ } -+ | KW_GEOMETRYSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); -+ } - | KW_CONSTANTBUFFER '<' type '>' - { - if ($3->class != HLSL_CLASS_STRUCT) -@@ -7483,10 +7537,10 @@ variables_def_typed: - { - struct parse_variable_def *head_def; - -- assert(!list_empty($1)); -+ VKD3D_ASSERT(!list_empty($1)); - head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); - -- assert(head_def->basic_type); -+ VKD3D_ASSERT(head_def->basic_type); - $3->basic_type = head_def->basic_type; - $3->modifiers = head_def->modifiers; - $3->modifiers_loc = head_def->modifiers_loc; -@@ -8252,6 +8306,23 @@ primary_expr: - YYABORT; - } - } -+ | STRING -+ { -+ struct hlsl_ir_node *c; -+ -+ if (!(c = hlsl_new_string_constant(ctx, $1, &@1))) -+ { -+ vkd3d_free($1); -+ YYABORT; -+ } -+ vkd3d_free($1); -+ -+ if (!($$ = make_block(ctx, c))) -+ { -+ hlsl_free_instr(c); -+ YYABORT; -+ } -+ } - | VAR_IDENTIFIER - { - struct hlsl_ir_load *load; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index 26386c0b8df..049461cdb7d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -53,7 +53,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - - if (regset == HLSL_REGSET_NUMERIC) - { -- assert(size % 4 == 0); -+ VKD3D_ASSERT(size % 4 == 0); - size /= 4; - } - -@@ -76,7 +76,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - - if (regset == HLSL_REGSET_NUMERIC) - { -- assert(*offset_component == 0); -+ VKD3D_ASSERT(*offset_component == 0); - *offset_component = field_offset % 4; - field_offset /= 4; - } -@@ -121,7 +121,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - return NULL; - hlsl_block_add_instr(block, offset); - -- assert(deref->var); -+ VKD3D_ASSERT(deref->var); - type = deref->var->data_type; - - for (i = 0; i < deref->path_len; ++i) -@@ -154,8 +154,8 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der - struct hlsl_block block; - struct hlsl_type *type; - -- assert(deref->var); -- assert(!hlsl_deref_is_lowered(deref)); -+ VKD3D_ASSERT(deref->var); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); - - type = hlsl_deref_get_type(ctx, deref); - -@@ -222,8 +222,8 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, - if (temp->default_values) - { - /* Transfer default values from the temp to the uniform. */ -- assert(!uniform->default_values); -- assert(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); -+ VKD3D_ASSERT(!uniform->default_values); -+ VKD3D_ASSERT(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); - uniform->default_values = temp->default_values; - temp->default_values = NULL; - } -@@ -399,7 +399,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - } - else - { -- assert(i == 0); -+ VKD3D_ASSERT(i == 0); - - if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) - return; -@@ -532,7 +532,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - } - else - { -- assert(i == 0); -+ VKD3D_ASSERT(i == 0); - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) - return; -@@ -927,7 +927,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - if (return_instr) - { - /* If we're in a loop, we should have used "break" instead. */ -- assert(!in_loop); -+ VKD3D_ASSERT(!in_loop); - - /* Iterate in reverse, to avoid use-after-free when unlinking sources from - * the "uses" list. */ -@@ -949,7 +949,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - struct hlsl_ir_load *load; - - /* If we're in a loop, we should have used "break" instead. */ -- assert(!in_loop); -+ VKD3D_ASSERT(!in_loop); - - if (tail == &cf_instr->entry) - return has_early_return; -@@ -1008,7 +1008,7 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h - struct hlsl_deref coords_deref; - struct hlsl_ir_var *coords; - -- assert(dim_count < 4); -+ VKD3D_ASSERT(dim_count < 4); - - if (!(coords = hlsl_new_synthetic_var(ctx, "coords", - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) -@@ -1109,9 +1109,9 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_resource_load_params params = {0}; - struct hlsl_ir_node *resource_load; - -- assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -- assert(coords->data_type->dimx == dim_count); -+ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(coords->data_type->dimx == dim_count); - - if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) - return false; -@@ -1141,7 +1141,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_deref row_deref; - unsigned int i; - -- assert(!hlsl_type_is_row_major(mat->data_type)); -+ VKD3D_ASSERT(!hlsl_type_is_row_major(mat->data_type)); - - if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) - return false; -@@ -1378,7 +1378,7 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co - unsigned int component_count = hlsl_type_component_count(var->data_type); - struct copy_propagation_value *value; - -- assert(component < component_count); -+ VKD3D_ASSERT(component < component_count); - value = copy_propagation_get_value_at_time(&var_def->traces[component], time); - - if (!value) -@@ -1411,7 +1411,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h - var_def->var = var; - - res = rb_put(&state->var_defs, var, &var_def->entry); -- assert(!res); -+ VKD3D_ASSERT(!res); - - return var_def; - } -@@ -1420,7 +1420,7 @@ static void copy_propagation_trace_record_value(struct hlsl_ctx *ctx, - struct copy_propagation_component_trace *trace, struct hlsl_ir_node *node, - unsigned int component, unsigned int time) - { -- assert(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); -+ VKD3D_ASSERT(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); - - if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity, - trace->record_count + 1, sizeof(trace->records[0]))) -@@ -1449,7 +1449,7 @@ static void copy_propagation_invalidate_variable(struct hlsl_ctx *ctx, struct co - /* Don't add an invalidate record if it is already present. */ - if (trace->record_count && trace->records[trace->record_count - 1].timestamp == time) - { -- assert(!trace->records[trace->record_count - 1].node); -+ VKD3D_ASSERT(!trace->records[trace->record_count - 1].node); - continue; - } - -@@ -1638,9 +1638,16 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_GEOMETRY_SHADER: - break; - - case HLSL_CLASS_MATRIX: -@@ -1651,11 +1658,8 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - * matrices yet. */ - return false; - -- case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -- case HLSL_CLASS_RENDER_TARGET_VIEW: -- case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - vkd3d_unreachable(); -@@ -1697,11 +1701,11 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, - - if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) - return false; -- assert(count == 1); -+ VKD3D_ASSERT(count == 1); - - if (!(value = copy_propagation_get_value(state, deref->var, start, time))) - return false; -- assert(value->component == 0); -+ VKD3D_ASSERT(value->component == 0); - - /* Only HLSL_IR_LOAD can produce an object. */ - load = hlsl_ir_load(value->node); -@@ -2000,12 +2004,12 @@ static enum validation_result validate_component_index_range_from_deref(struct h - struct hlsl_ir_node *path_node = deref->path[i].node; - unsigned int idx = 0; - -- assert(path_node); -+ VKD3D_ASSERT(path_node); - if (path_node->type != HLSL_IR_CONSTANT) - return DEREF_VALIDATION_NOT_CONSTANT; - - /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; -@@ -2061,7 +2065,7 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct - { - struct hlsl_ir_node *path_node = deref->path[i].node; - -- assert(path_node); -+ VKD3D_ASSERT(path_node); - if (path_node->type != HLSL_IR_CONSTANT) - hlsl_note(ctx, &path_node->loc, VKD3D_SHADER_LOG_ERROR, - "Expression for %s within \"%s\" cannot be resolved statically.", -@@ -2535,7 +2539,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - return false; - - deref = &hlsl_ir_load(instr)->src; -- assert(deref->var); -+ VKD3D_ASSERT(deref->var); - - if (deref->path_len == 0) - return false; -@@ -2609,7 +2613,7 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc - return false; - - deref = &hlsl_ir_store(instr)->lhs; -- assert(deref->var); -+ VKD3D_ASSERT(deref->var); - - if (deref->path_len == 0) - return false; -@@ -2630,6 +2634,124 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc - return false; - } - -+/* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant -+ * index into multiple constant loads, where the value of only one of them ends up in the resulting -+ * node. -+ * This is achieved through a synthetic variable. The non-constant index is compared for equality -+ * with every possible value it can have within the array bounds, and the ternary operator is used -+ * to update the value of the synthetic var when the equality check passes. */ -+static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+ struct hlsl_block *block) -+{ -+ struct hlsl_constant_value zero_value = {0}; -+ struct hlsl_ir_node *cut_index, *zero, *store; -+ unsigned int i, i_cut, element_count; -+ const struct hlsl_deref *deref; -+ struct hlsl_type *cut_type; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_var *var; -+ bool row_major; -+ -+ if (instr->type != HLSL_IR_LOAD) -+ return false; -+ load = hlsl_ir_load(instr); -+ deref = &load->src; -+ -+ if (deref->path_len == 0) -+ return false; -+ -+ for (i = deref->path_len - 1; ; --i) -+ { -+ if (deref->path[i].node->type != HLSL_IR_CONSTANT) -+ { -+ i_cut = i; -+ break; -+ } -+ -+ if (i == 0) -+ return false; -+ } -+ -+ cut_index = deref->path[i_cut].node; -+ cut_type = deref->var->data_type; -+ for (i = 0; i < i_cut; ++i) -+ cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node); -+ -+ row_major = hlsl_type_is_row_major(cut_type); -+ VKD3D_ASSERT(cut_type->class == HLSL_CLASS_ARRAY || row_major); -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, row_major ? "row_major-load" : "array-load", instr->data_type, &instr->loc))) -+ return false; -+ -+ if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ -+ if (!(store = hlsl_new_simple_store(ctx, var, zero))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ TRACE("Lowering non-constant %s load on variable '%s'.\n", row_major ? "row_major" : "array", deref->var->name); -+ -+ element_count = hlsl_type_element_count(cut_type); -+ for (i = 0; i < element_count; ++i) -+ { -+ struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *const_i, *equals, *ternary, *var_store; -+ struct hlsl_ir_load *var_load, *specific_load; -+ struct hlsl_deref deref_copy = {0}; -+ -+ if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, const_i); -+ -+ operands[0] = cut_index; -+ operands[1] = const_i; -+ if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, equals); -+ -+ if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, equals); -+ -+ if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, &var_load->node); -+ -+ if (!hlsl_copy_deref(ctx, &deref_copy, deref)) -+ return false; -+ hlsl_src_remove(&deref_copy.path[i_cut]); -+ hlsl_src_from_node(&deref_copy.path[i_cut], const_i); -+ -+ if (!(specific_load = hlsl_new_load_index(ctx, &deref_copy, NULL, &cut_index->loc))) -+ { -+ hlsl_cleanup_deref(&deref_copy); -+ return false; -+ } -+ hlsl_block_add_instr(block, &specific_load->node); -+ -+ hlsl_cleanup_deref(&deref_copy); -+ -+ operands[0] = equals; -+ operands[1] = &specific_load->node; -+ operands[2] = &var_load->node; -+ if (!(ternary = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, ternary); -+ -+ if (!(var_store = hlsl_new_simple_store(ctx, var, ternary))) -+ return false; -+ hlsl_block_add_instr(block, var_store); -+ } -+ -+ if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, &load->node); -+ -+ return true; -+} - /* Lower combined samples and sampler variables to synthesized separated textures and samplers. - * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ - static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2672,7 +2794,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - return false; - } - -- assert(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); -+ VKD3D_ASSERT(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); - - if (!(name = hlsl_get_string_buffer(ctx))) - return false; -@@ -2689,7 +2811,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - struct hlsl_type *arr_type = load->resource.var->data_type; - for (i = 0; i < load->resource.path_len; ++i) - { -- assert(arr_type->class == HLSL_CLASS_ARRAY); -+ VKD3D_ASSERT(arr_type->class == HLSL_CLASS_ARRAY); - texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); - arr_type = arr_type->e.array.type; - } -@@ -2718,8 +2840,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - - hlsl_copy_deref(ctx, &load->sampler, &load->resource); - load->resource.var = var; -- assert(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); -- assert(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); -+ VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); -+ VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); - - return true; - } -@@ -3137,7 +3259,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); - - /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ -- assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) - return false; -@@ -3193,7 +3315,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - } - -- assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, - instr->data_type->dimx, instr->data_type->dimy); -@@ -3491,7 +3613,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - - /* Narrowing casts should have already been lowered. */ -- assert(type->dimx == arg_type->dimx); -+ VKD3D_ASSERT(type->dimx == arg_type->dimx); - - zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); - if (!zero) -@@ -3513,7 +3635,7 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; - struct hlsl_ir_node *cond; - -- assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); - - if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) - { -@@ -3712,7 +3834,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - { - arg1 = expr->operands[0].node; - arg2 = expr->operands[1].node; -- assert(arg1->data_type->dimx == arg2->data_type->dimx); -+ VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx); - dimx = arg1->data_type->dimx; - is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; - -@@ -3930,6 +4052,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_INDEX: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_STRING_CONSTANT: - case HLSL_IR_SWIZZLE: - if (list_empty(&instr->uses)) - { -@@ -3987,8 +4110,8 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - if (!deref->rel_offset.node) - return false; - -- assert(deref->var); -- assert(deref->rel_offset.node->type != HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(deref->var); -+ VKD3D_ASSERT(deref->rel_offset.node->type != HLSL_IR_CONSTANT); - deref->var->indexable = true; - - return true; -@@ -4210,6 +4333,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - break; - } - case HLSL_IR_CONSTANT: -+ case HLSL_IR_STRING_CONSTANT: - break; - } - } -@@ -4311,7 +4435,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - unsigned int writemask; - uint32_t reg_idx; - -- assert(component_count <= reg_size); -+ VKD3D_ASSERT(component_count <= reg_size); - - for (reg_idx = 0;; ++reg_idx) - { -@@ -4340,7 +4464,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct - struct hlsl_reg ret = {0}; - uint32_t reg_idx; - -- assert((reg_writemask & writemask) == writemask); -+ VKD3D_ASSERT((reg_writemask & writemask) == writemask); - - for (reg_idx = 0;; ++reg_idx) - { -@@ -4450,7 +4574,7 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls - { - enum hlsl_sampler_dim dim; - -- assert(!load->sampler.var); -+ VKD3D_ASSERT(!load->sampler.var); - - dim = var->objects_usage[regset][index].sampler_dim; - if (dim != load->sampling_dim) -@@ -4732,9 +4856,9 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - -- assert(hlsl_is_numeric_type(type)); -- assert(type->dimy == 1); -- assert(constant->reg.writemask); -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); -+ VKD3D_ASSERT(type->dimy == 1); -+ VKD3D_ASSERT(constant->reg.writemask); - - for (x = 0, i = 0; x < 4; ++x) - { -@@ -4845,6 +4969,43 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) - list_move_tail(&ctx->extern_vars, &sorted); - } - -+/* In SM2, 'sincos' expects specific constants as src1 and src2 arguments. -+ * These have to be referenced directly, i.e. as 'c' not 'r'. */ -+static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct register_allocator *allocator) -+{ -+ const struct hlsl_ir_node *instr; -+ struct hlsl_type *type; -+ -+ if (ctx->profile->major_version >= 3) -+ return; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED -+ || hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED)) -+ { -+ type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); -+ -+ ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); -+ TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); -+ -+ ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); -+ TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); -+ -+ return; -+ } -+ } -+} -+ - static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { - struct register_allocator allocator_used = {0}; -@@ -4866,7 +5027,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - unsigned int reg_idx = var->reg_reservation.reg_index; - unsigned int i; - -- assert(reg_size % 4 == 0); -+ VKD3D_ASSERT(reg_size % 4 == 0); - for (i = 0; i < reg_size / 4; ++i) - { - if (i < bind_count) -@@ -4909,6 +5070,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - - allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); - -+ allocate_sincos_const_registers(ctx, &entry_func->body, &allocator); -+ - vkd3d_free(allocator.allocations); - } - -@@ -4958,7 +5121,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - uint32_t reg; - bool builtin; - -- assert(var->semantic.name); -+ VKD3D_ASSERT(var->semantic.name); - - if (ctx->profile->major_version < 4) - { -@@ -5357,7 +5520,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - if (var->regs[regset].index < min_index) - { -- assert(regset == HLSL_REGSET_UAVS); -+ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "UAV index (%u) must be higher than the maximum render target index (%u).", - var->regs[regset].index, min_index - 1); -@@ -5435,12 +5598,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - struct hlsl_ir_node *path_node = deref->path[i].node; - unsigned int idx = 0; - -- assert(path_node); -+ VKD3D_ASSERT(path_node); - if (path_node->type != HLSL_IR_CONSTANT) - return false; - - /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; -@@ -5500,11 +5663,11 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - struct hlsl_ir_node *path_node = deref->path[i].node; - unsigned int idx = 0; - -- assert(path_node); -+ VKD3D_ASSERT(path_node); - if (path_node->type == HLSL_IR_CONSTANT) - { - /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; -@@ -5554,8 +5717,8 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - type = hlsl_get_element_type_from_path_index(ctx, type, path_node); - } - -- assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); -- assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); -+ VKD3D_ASSERT(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); -+ VKD3D_ASSERT(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); - return index_is_constant; - } - -@@ -5570,16 +5733,17 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - if (offset_node) - { - /* We should always have generated a cast to UINT. */ -- assert(offset_node->data_type->class == HLSL_CLASS_SCALAR -+ VKD3D_ASSERT(offset_node->data_type->class == HLSL_CLASS_SCALAR - && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -- assert(offset_node->type != HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(offset_node->type != HLSL_IR_CONSTANT); - return false; - } - - size = deref->var->data_type->reg_size[regset]; - if (*offset >= size) - { -- hlsl_error(ctx, &offset_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ /* FIXME: Report a more specific location for the constant deref. */ -+ hlsl_error(ctx, &deref->var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Dereference is out of bounds. %u/%u", *offset, size); - return false; - } -@@ -5594,8 +5758,9 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl - if (hlsl_offset_from_deref(ctx, deref, &offset)) - return offset; - -- hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", -- hlsl_node_type_to_string(deref->rel_offset.node->type)); -+ if (deref->rel_offset.node) -+ hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", -+ hlsl_node_type_to_string(deref->rel_offset.node->type)); - - return 0; - } -@@ -5606,8 +5771,8 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -- assert(deref->data_type); -- assert(hlsl_is_numeric_type(deref->data_type)); -+ VKD3D_ASSERT(deref->data_type); -+ VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type)); - - ret.index += offset / 4; - ret.id += offset / 4; -@@ -5795,7 +5960,7 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, - register_index = var->regs[HLSL_REGSET_NUMERIC].id; - - ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); -- assert(ret); -+ VKD3D_ASSERT(ret); - /* With the exception of vertex POSITION output, none of these are - * system values. Pixel POSITION input is not equivalent to - * SV_Position; the closer equivalent is VPOS, which is not declared -@@ -6070,7 +6235,7 @@ static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return; - - cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); -- assert(cloned_loop); -+ VKD3D_ASSERT(cloned_loop); - - if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) - { -@@ -6085,6 +6250,90 @@ static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *bloc - } - } - -+static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *call, *rhs, *store; -+ struct hlsl_ir_function_decl *func; -+ unsigned int component_count; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_expr *expr; -+ struct hlsl_ir_var *lhs; -+ char *body; -+ -+ static const char template[] = -+ "typedef uint%u uintX;\n" -+ "float%u soft_f16tof32(uintX x)\n" -+ "{\n" -+ " uintX mantissa = x & 0x3ff;\n" -+ " uintX high2 = mantissa >> 8;\n" -+ " uintX high2_check = high2 ? high2 : mantissa;\n" -+ " uintX high6 = high2_check >> 4;\n" -+ " uintX high6_check = high6 ? high6 : high2_check;\n" -+ "\n" -+ " uintX high8 = high6_check >> 2;\n" -+ " uintX high8_check = (high8 ? high8 : high6_check) >> 1;\n" -+ " uintX shift = high6 ? (high2 ? 12 : 4) : (high2 ? 8 : 0);\n" -+ " shift = high8 ? shift + 2 : shift;\n" -+ " shift = high8_check ? shift + 1 : shift;\n" -+ " shift = -shift + 10;\n" -+ " shift = mantissa ? shift : 11;\n" -+ " uintX subnormal_mantissa = ((mantissa << shift) << 23) & 0x7fe000;\n" -+ " uintX subnormal_exp = -(shift << 23) + 0x38800000;\n" -+ " uintX subnormal_val = subnormal_exp + subnormal_mantissa;\n" -+ " uintX subnormal_or_zero = mantissa ? subnormal_val : 0;\n" -+ "\n" -+ " uintX exponent = (((x >> 10) << 23) & 0xf800000) + 0x38000000;\n" -+ "\n" -+ " uintX low_3 = (x << 13) & 0x7fe000;\n" -+ " uintX normalized_val = exponent + low_3;\n" -+ " uintX inf_nan_val = low_3 + 0x7f800000;\n" -+ "\n" -+ " uintX exp_mask = 0x7c00;\n" -+ " uintX is_inf_nan = (x & exp_mask) == exp_mask;\n" -+ " uintX is_normalized = x & exp_mask;\n" -+ "\n" -+ " uintX check = is_inf_nan ? inf_nan_val : normalized_val;\n" -+ " uintX exp_mantissa = (is_normalized ? check : subnormal_or_zero) & 0x7fffe000;\n" -+ " uintX sign_bit = (x << 16) & 0x80000000;\n" -+ "\n" -+ " return asfloat(exp_mantissa + sign_bit);\n" -+ "}\n"; -+ -+ -+ if (node->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(node); -+ -+ if (expr->op != HLSL_OP1_F16TOF32) -+ return false; -+ -+ rhs = expr->operands[0].node; -+ component_count = hlsl_type_component_count(rhs->data_type); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) -+ return false; -+ -+ if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body))) -+ return false; -+ -+ lhs = func->parameters.vars[0]; -+ -+ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ if (!(call = hlsl_new_call(ctx, func, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, call); -+ -+ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, &load->node); -+ -+ return true; -+} -+ - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -6105,6 +6354,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - if (ctx->result) - return ctx->result; - -+ if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) -+ lower_ir(ctx, lower_f16tof32, body); -+ - lower_return(ctx, entry_func, body, false); - - while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); -@@ -6195,6 +6447,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - if (profile->major_version < 4) - { -+ while (lower_ir(ctx, lower_nonconstant_array_loads, body)); -+ - lower_ir(ctx, lower_ternary, body); - - lower_ir(ctx, lower_nonfloat_exprs, body); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 16015fa8a81..db4913b7c62 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -28,7 +28,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -119,7 +119,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -234,7 +234,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -260,7 +260,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -286,7 +286,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -313,7 +313,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - unsigned int k; - float i; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -339,7 +339,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -384,7 +384,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -418,7 +418,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -442,7 +442,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -487,7 +487,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -527,7 +527,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -553,7 +553,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -598,8 +598,8 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -635,8 +635,8 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -662,8 +662,8 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -689,8 +689,8 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -715,9 +715,9 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -- assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); - - dst->u[0].f = 0.0f; - for (k = 0; k < src1->node.data_type->dimx; ++k) -@@ -743,11 +743,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -- assert(type == src3->node.data_type->e.numeric.type); -- assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -- assert(src3->node.data_type->dimx == 1); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ VKD3D_ASSERT(src3->node.data_type->dimx == 1); - - dst->u[0].f = src3->value.u[0].f; - for (k = 0; k < src1->node.data_type->dimx; ++k) -@@ -774,8 +774,8 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -841,8 +841,8 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -877,8 +877,8 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -916,8 +916,8 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -955,8 +955,8 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -- assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -986,8 +986,8 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1024,8 +1024,8 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1063,8 +1063,8 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1105,8 +1105,8 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1139,8 +1139,8 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1175,9 +1175,9 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - { - unsigned int k; - -- assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); -- assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); -- assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); - - for (k = 0; k < dst_type->dimx; ++k) - dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; -@@ -1190,8 +1190,8 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -- assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1239,7 +1239,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - { - if (expr->operands[i].node->type != HLSL_IR_CONSTANT) - return false; -- assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); - } - } - arg1 = hlsl_ir_constant(expr->operands[0].node); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index e0ac6322c71..c1b8582af6d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -469,7 +469,9 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - return ret; - break; - -+ case VKD3DSIH_DCL: - case VKD3DSIH_DCL_CONSTANT_BUFFER: -+ case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_TEMPS: - vkd3d_shader_instruction_make_nop(ins); - break; -@@ -917,7 +919,7 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param - if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) - { - /* The TPF reader validates idx_count. */ -- assert(reg->idx_count == 1); -+ VKD3D_ASSERT(reg->idx_count == 1); - reg->idx[1] = reg->idx[0]; - /* The control point id param is implicit here. Avoid later complications by inserting it. */ - reg->idx[0].offset = 0; -@@ -1139,16 +1141,16 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u - { - unsigned int i, j, r, c, component_idx, component_count; - -- assert(write_mask <= VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(write_mask <= VKD3DSP_WRITEMASK_ALL); - component_idx = vsir_write_mask_get_component_idx(write_mask); - component_count = vsir_write_mask_component_count(write_mask); - -- assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); -+ VKD3D_ASSERT(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); - - if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) - { - /* Validated in the TPF reader. */ -- assert(range_map[register_idx][component_idx] != UINT8_MAX); -+ VKD3D_ASSERT(range_map[register_idx][component_idx] != UINT8_MAX); - return; - } - if (range_map[register_idx][component_idx] == register_count) -@@ -1168,7 +1170,7 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u - /* A synthetic patch constant range which overlaps an existing range can start upstream of it - * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. - * The latter is validated in the TPF reader. */ -- assert(!range_map[r][c] || !is_dcl_indexrange); -+ VKD3D_ASSERT(!range_map[r][c] || !is_dcl_indexrange); - range_map[r][c] = UINT8_MAX; - } - } -@@ -1371,7 +1373,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - - TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, - e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); -- assert(!(e->mask & f->mask)); -+ VKD3D_ASSERT(!(e->mask & f->mask)); - - e->mask |= f->mask; - e->used_mask |= f->used_mask; -@@ -1405,7 +1407,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - continue; - - register_count = range_map_get_register_count(range_map, e->register_index, e->mask); -- assert(register_count != UINT8_MAX); -+ VKD3D_ASSERT(register_count != UINT8_MAX); - register_count += !register_count; - - if (register_count > 1) -@@ -1428,7 +1430,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, - unsigned int id_idx, unsigned int register_index) - { -- assert(id_idx < ARRAY_SIZE(reg->idx) - 1); -+ VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); - - /* For a relative-addressed register index, move the id up a slot to separate it from the address, - * because rel_addr can be replaced with a constant offset in some cases. */ -@@ -1535,7 +1537,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - if (is_io_dcl) - { - /* Validated in the TPF reader. */ -- assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); -+ VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); - - if (dcl_params[element_idx]) - { -@@ -1560,7 +1562,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - else - { - /* The control point id param. */ -- assert(reg->idx[0].rel_addr); -+ VKD3D_ASSERT(reg->idx[0].rel_addr); - } - id_idx = 1; - } -@@ -1799,7 +1801,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; - else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) - normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; -- else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); -+ else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); - } - } - } -@@ -2815,7 +2817,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi - - static unsigned int label_from_src_param(const struct vkd3d_shader_src_param *param) - { -- assert(param->reg.type == VKD3DSPR_LABEL); -+ VKD3D_ASSERT(param->reg.type == VKD3DSPR_LABEL); - return param->reg.idx[0].offset; - } - -@@ -3084,7 +3086,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - unsigned int label; - - label = label_from_src_param(&ins->src[j + 1]); -- assert(label); -+ VKD3D_ASSERT(label); - - info = &block_info[label - 1]; - -@@ -3241,7 +3243,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int - - byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); - -- assert(label); -+ VKD3D_ASSERT(label); - memset(block, 0, sizeof(*block)); - block->label = label; - vsir_block_list_init(&block->predecessors); -@@ -3525,7 +3527,7 @@ static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_blo - struct vsir_block *successor = &cfg->blocks[target - 1]; - enum vkd3d_result ret; - -- assert(successor->label != 0); -+ VKD3D_ASSERT(successor->label != 0); - - if ((ret = vsir_block_list_add(&block->successors, successor)) < 0) - return ret; -@@ -3702,11 +3704,11 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - { - unsigned int label = label_from_src_param(&instruction->src[0]); - -- assert(!current_block); -- assert(label > 0); -- assert(label <= cfg->block_count); -+ VKD3D_ASSERT(!current_block); -+ VKD3D_ASSERT(label > 0); -+ VKD3D_ASSERT(label <= cfg->block_count); - current_block = &cfg->blocks[label - 1]; -- assert(current_block->label == 0); -+ VKD3D_ASSERT(current_block->label == 0); - if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) - goto fail; - current_block->begin = &program->instructions.elements[i + 1]; -@@ -3717,7 +3719,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - - case VKD3DSIH_BRANCH: - case VKD3DSIH_RET: -- assert(current_block); -+ VKD3D_ASSERT(current_block); - current_block->end = instruction; - current_block = NULL; - break; -@@ -3725,7 +3727,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- assert(!current_block); -+ VKD3D_ASSERT(!current_block); - finish = true; - break; - -@@ -3795,7 +3797,7 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru - { - size_t i; - -- assert(current->label != 0); -+ VKD3D_ASSERT(current->label != 0); - - if (current == reference) - return; -@@ -4010,7 +4012,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - /* Do not count back edges. */ - if (cfg->loops_by_header[i] != SIZE_MAX) - { -- assert(in_degrees[i] > 0); -+ VKD3D_ASSERT(in_degrees[i] > 0); - in_degrees[i] -= 1; - } - -@@ -4096,7 +4098,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - - inner_stack_item->seen_count += new_seen_count; - -- assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); -+ VKD3D_ASSERT(inner_stack_item->seen_count <= inner_stack_item->loop->count); - if (inner_stack_item->seen_count != inner_stack_item->loop->count) - break; - -@@ -4116,7 +4118,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - if (vsir_block_dominates(successor, block)) - continue; - -- assert(in_degrees[successor->label - 1] > 0); -+ VKD3D_ASSERT(in_degrees[successor->label - 1] > 0); - --in_degrees[successor->label - 1]; - - if (in_degrees[successor->label - 1] == 0) -@@ -4137,7 +4139,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - goto fail; - } - -- assert(sorter.stack_count == 0); -+ VKD3D_ASSERT(sorter.stack_count == 0); - - vkd3d_free(in_degrees); - vkd3d_free(sorter.stack); -@@ -4207,7 +4209,7 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ - if (vsir_block_dominates(successor, block)) - continue; - -- assert(block->order_pos < successor->order_pos); -+ VKD3D_ASSERT(block->order_pos < successor->order_pos); - - /* Jumping from a block to the following one is always - * possible, so nothing to do. */ -@@ -4280,7 +4282,7 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ - { - if (interval->synthetic) - interval->begin = min(begin, interval->begin); -- assert(begin >= interval->begin); -+ VKD3D_ASSERT(begin >= interval->begin); - } - } - -@@ -4333,7 +4335,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block - break; - } - -- assert(action->target != UINT_MAX); -+ VKD3D_ASSERT(action->target != UINT_MAX); - action->jump_type = JUMP_CONTINUE; - } - else -@@ -4355,7 +4357,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block - - if (action->target == UINT_MAX) - { -- assert(successor->order_pos == block->order_pos + 1); -+ VKD3D_ASSERT(successor->order_pos == block->order_pos + 1); - action->jump_type = JUMP_NONE; - } - else -@@ -4382,7 +4384,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - struct vsir_block *block = cfg->order.blocks[i]; - struct vsir_cfg_structure *structure; - -- assert(stack_depth > 0); -+ VKD3D_ASSERT(stack_depth > 0); - - /* Open loop intervals. */ - while (open_interval_idx < cfg->loop_interval_count) -@@ -4441,7 +4443,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - * selection ladders. */ - if (action_true.successor == action_false.successor) - { -- assert(action_true.jump_type == action_false.jump_type); -+ VKD3D_ASSERT(action_true.jump_type == action_false.jump_type); - } - else - { -@@ -4457,7 +4459,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; - struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; - -- assert(inner_loop->type == STRUCTURE_TYPE_LOOP); -+ VKD3D_ASSERT(inner_loop->type == STRUCTURE_TYPE_LOOP); - - /* Otherwise, if one of the branches is - * continueing the inner loop we're inside, -@@ -4474,7 +4476,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - action_false = tmp; - } - -- assert(action_true.jump_type != JUMP_NONE); -+ VKD3D_ASSERT(action_true.jump_type != JUMP_NONE); - - if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) - goto fail; -@@ -4514,8 +4516,8 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - } - } - -- assert(stack_depth == 0); -- assert(open_interval_idx == cfg->loop_interval_count); -+ VKD3D_ASSERT(stack_depth == 0); -+ VKD3D_ASSERT(open_interval_idx == cfg->loop_interval_count); - - if (TRACE_ON()) - vsir_cfg_dump_structured_program(cfg); -@@ -4539,7 +4541,7 @@ static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, - && !last->u.jump.condition && last->u.jump.target == target) - { - --list->count; -- assert(cfg->loop_intervals[target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); - --cfg->loop_intervals[target].target_count; - } - } -@@ -4580,7 +4582,7 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg - size_t pos = list->count - 1; - - selection = &list->structures[pos]; -- assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); - - if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); - else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); -@@ -4601,19 +4603,19 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg - /* Pointer `selection' could have been invalidated by the append - * operation. */ - selection = &list->structures[pos]; -- assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); - - if (if_target == max_target) - { - --selection->u.selection.if_body.count; -- assert(cfg->loop_intervals[if_target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[if_target].target_count > 0); - --cfg->loop_intervals[if_target].target_count; - } - - if (else_target == max_target) - { - --selection->u.selection.else_body.count; -- assert(cfg->loop_intervals[else_target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[else_target].target_count > 0); - --cfg->loop_intervals[else_target].target_count; - } - -@@ -4721,7 +4723,7 @@ static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, - } - - target = trailing_break->u.jump.target; -- assert(cfg->loop_intervals[target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); - - /* If the loop is not targeted by any jump, we can remove it. The - * trailing `break' then targets another loop, so we have to keep -@@ -4888,7 +4890,7 @@ static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_stru - break; - for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) - { -- assert(l->type == STRUCTURE_TYPE_LOOP); -+ VKD3D_ASSERT(l->type == STRUCTURE_TYPE_LOOP); - l->u.loop.needs_trampoline = true; - } - break; -@@ -4928,7 +4930,7 @@ static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_struct - case STRUCTURE_TYPE_JUMP: - if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) - break; -- assert(loop && loop->type == STRUCTURE_TYPE_LOOP); -+ VKD3D_ASSERT(loop && loop->type == STRUCTURE_TYPE_LOOP); - if (loop->u.loop.needs_trampoline) - structure->u.jump.needs_launcher = true; - break; -@@ -5126,7 +5128,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, - break; - - case JUMP_RET: -- assert(!jump->condition); -+ VKD3D_ASSERT(!jump->condition); - opcode = VKD3DSIH_RET; - break; - -@@ -5266,18 +5268,18 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - switch (ins->opcode) - { - case VKD3DSIH_LABEL: -- assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); - TRACE("Structurizing a non-hull shader.\n"); - if ((ret = vsir_program_structurize_function(program, message_context, - &target, &i)) < 0) - goto fail; -- assert(i == program->instructions.count); -+ VKD3D_ASSERT(i == program->instructions.count); - break; - - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); - TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); - target.instructions[target.ins_count++] = *ins; - ++i; -@@ -5439,18 +5441,18 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - switch (ins->opcode) - { - case VKD3DSIH_LABEL: -- assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); - TRACE("Materializing undominated SSAs in a non-hull shader.\n"); - if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( - program, message_context, &i)) < 0) - return ret; -- assert(i == program->instructions.count); -+ VKD3D_ASSERT(i == program->instructions.count); - break; - - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); - TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); - ++i; - if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( -@@ -6097,8 +6099,8 @@ static const char *name_from_cf_type(enum cf_type type) - static void vsir_validate_cf_type(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) - { -- assert(ctx->cf_type != CF_TYPE_UNKNOWN); -- assert(expected_type != CF_TYPE_UNKNOWN); -+ VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); -+ VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); - if (ctx->cf_type != expected_type) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", - instruction->opcode, name_from_cf_type(ctx->cf_type)); -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h -index 4860cf5f90e..9806614a35b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.h -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h -@@ -141,7 +141,7 @@ void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location - - static inline struct preproc_file *preproc_get_top_file(struct preproc_ctx *ctx) - { -- assert(ctx->file_count); -+ VKD3D_ASSERT(ctx->file_count); - return &ctx->file_stack[ctx->file_count - 1]; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index a3cdbe559a7..7fc963192cf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -409,7 +409,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - } - ctx->last_was_eof = false; - -- assert(ctx->file_count); -+ VKD3D_ASSERT(ctx->file_count); - if (!(token = preproc_lexer_lex(lval, lloc, scanner))) - { - ctx->last_was_eof = true; -@@ -647,7 +647,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - struct preproc_text *current_arg = NULL; - -- assert(func_state->macro->arg_count); -+ VKD3D_ASSERT(func_state->macro->arg_count); - - if (func_state->arg_count < func_state->macro->arg_count) - current_arg = &func_state->macro->arg_values[func_state->arg_count]; -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y -index 009c35ffb97..366e351e3b5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.y -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y -@@ -119,7 +119,7 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati - macro->body.text = *body; - macro->body.location = *body_loc; - ret = rb_put(&ctx->macros, name, ¯o->entry); -- assert(!ret); -+ VKD3D_ASSERT(!ret); - return true; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index d66446be0b0..bc8a7a5b28c 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -313,7 +313,7 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, - struct vkd3d_spirv_chunk *chunk; - size_t src_location = 0; - -- assert(list_empty(&dst_stream->inserted_chunks)); -+ VKD3D_ASSERT(list_empty(&dst_stream->inserted_chunks)); - - LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) - src_word_count += chunk->word_count; -@@ -322,16 +322,16 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, - dst_stream->word_count + src_word_count, sizeof(*dst_stream->words))) - return false; - -- assert(dst_stream->word_count + src_word_count <= dst_stream->capacity); -+ VKD3D_ASSERT(dst_stream->word_count + src_word_count <= dst_stream->capacity); - LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) - { -- assert(src_location <= chunk->location); -+ VKD3D_ASSERT(src_location <= chunk->location); - word_count = chunk->location - src_location; - memcpy(&dst_stream->words[dst_stream->word_count], &src_stream->words[src_location], - word_count * sizeof(*src_stream->words)); - dst_stream->word_count += word_count; - src_location += word_count; -- assert(src_location == chunk->location); -+ VKD3D_ASSERT(src_location == chunk->location); - - memcpy(&dst_stream->words[dst_stream->word_count], chunk->words, - chunk->word_count * sizeof(*chunk->words)); -@@ -464,7 +464,7 @@ static void vkd3d_spirv_set_execution_model(struct vkd3d_spirv_builder *builder, - - static uint32_t vkd3d_spirv_opcode_word(SpvOp op, unsigned int word_count) - { -- assert(!(op & ~SpvOpCodeMask)); -+ VKD3D_ASSERT(!(op & ~SpvOpCodeMask)); - return (word_count << SpvWordCountShift) | op; - } - -@@ -538,7 +538,7 @@ static int vkd3d_spirv_declaration_compare(const void *key, const struct rb_entr - return ret; - if ((ret = vkd3d_u32_compare(a->parameter_count, b->parameter_count))) - return ret; -- assert(a->parameter_count <= ARRAY_SIZE(a->parameters)); -+ VKD3D_ASSERT(a->parameter_count <= ARRAY_SIZE(a->parameters)); - return memcmp(&a->parameters, &b->parameters, a->parameter_count * sizeof(*a->parameters)); - } - -@@ -554,7 +554,7 @@ static void vkd3d_spirv_insert_declaration(struct vkd3d_spirv_builder *builder, - { - struct vkd3d_spirv_declaration *d; - -- assert(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); -+ VKD3D_ASSERT(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); - - if (!(d = vkd3d_malloc(sizeof(*d)))) - return; -@@ -823,7 +823,7 @@ static uint32_t vkd3d_spirv_build_op_tr2v(struct vkd3d_spirv_builder *builder, - static void vkd3d_spirv_begin_function_stream_insertion(struct vkd3d_spirv_builder *builder, - size_t location) - { -- assert(builder->insertion_location == ~(size_t)0); -+ VKD3D_ASSERT(builder->insertion_location == ~(size_t)0); - - if (vkd3d_spirv_stream_current_location(&builder->function_stream) == location) - return; -@@ -1166,7 +1166,7 @@ static uint32_t vkd3d_spirv_get_op_constant(struct vkd3d_spirv_builder *builder, - static uint32_t vkd3d_spirv_build_op_constant64(struct vkd3d_spirv_builder *builder, - uint32_t result_type, const uint32_t *values, unsigned int value_count) - { -- assert(value_count == 2); -+ VKD3D_ASSERT(value_count == 2); - return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, - SpvOpConstant, result_type, values, value_count); - } -@@ -1583,13 +1583,13 @@ static uint32_t vkd3d_spirv_build_image_instruction(struct vkd3d_spirv_builder * - unsigned int index = 0, i; - uint32_t w[10]; - -- assert(operand_count <= ARRAY_SIZE(w)); -+ VKD3D_ASSERT(operand_count <= ARRAY_SIZE(w)); - for (i = 0; i < operand_count; ++i) - w[index++] = operands[i]; - - if (image_operands_mask) - { -- assert(index + 1 + image_operand_count <= ARRAY_SIZE(w)); -+ VKD3D_ASSERT(index + 1 + image_operand_count <= ARRAY_SIZE(w)); - w[index++] = image_operands_mask; - for (i = 0; i < image_operand_count; ++i) - w[index++] = image_operands[i]; -@@ -1606,9 +1606,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample(struct vkd3d_spirv_builder *bu - const uint32_t operands[] = {sampled_image_id, coordinate_id}; - - if (op == SpvOpImageSampleExplicitLod) -- assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); -+ VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); - else -- assert(op == SpvOpImageSampleImplicitLod); -+ VKD3D_ASSERT(op == SpvOpImageSampleImplicitLod); - - return vkd3d_spirv_build_image_instruction(builder, op, result_type, - operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); -@@ -1621,9 +1621,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample_dref(struct vkd3d_spirv_builde - const uint32_t operands[] = {sampled_image_id, coordinate_id, dref_id}; - - if (op == SpvOpImageSampleDrefExplicitLod) -- assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); -+ VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); - else -- assert(op == SpvOpImageSampleDrefImplicitLod); -+ VKD3D_ASSERT(op == SpvOpImageSampleDrefImplicitLod); - - return vkd3d_spirv_build_image_instruction(builder, op, result_type, - operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); -@@ -1900,7 +1900,7 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - } - else - { -- assert(component_type != VKD3D_SHADER_COMPONENT_VOID); -+ VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); - scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); - } -@@ -2266,7 +2266,7 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, - case VKD3DSPR_OUTPUT: - case VKD3DSPR_PATCHCONST: - symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; -- assert(!reg->idx_count || symbol->key.reg.idx != ~0u); -+ VKD3D_ASSERT(!reg->idx_count || symbol->key.reg.idx != ~0u); - break; - - case VKD3DSPR_IMMCONSTBUFFER: -@@ -2905,7 +2905,7 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind - - if (is_uav_counter) - { -- assert(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); -+ VKD3D_ASSERT(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); - binding_offsets = compiler->offset_info.uav_counter_offsets; - for (i = 0; i < shader_interface->uav_counter_count; ++i) - { -@@ -3023,7 +3023,7 @@ static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int i; - -- assert(0 < component_count && component_count <= VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_VEC4_SIZE); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - - switch (component_type) -@@ -3064,7 +3064,7 @@ static uint32_t spirv_compiler_get_constant64(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int i; - -- assert(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); -+ VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - - if (component_type != VKD3D_SHADER_COMPONENT_DOUBLE && component_type != VKD3D_SHADER_COMPONENT_UINT64) -@@ -3442,7 +3442,7 @@ static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *comp - uint32_t type_id, result_id; - unsigned int i; - -- assert(val_component_idx < val_component_count); -+ VKD3D_ASSERT(val_component_idx < val_component_count); - - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - if (val_component_count == 1) -@@ -3503,11 +3503,11 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, - struct vkd3d_symbol reg_symbol, *symbol; - struct rb_entry *entry; - -- assert(!register_is_constant_or_undef(reg)); -+ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); - - if (reg->type == VKD3DSPR_TEMP) - { -- assert(reg->idx[0].offset < compiler->temp_count); -+ VKD3D_ASSERT(reg->idx[0].offset < compiler->temp_count); - register_info->id = compiler->temp_id + reg->idx[0].offset; - register_info->storage_class = SpvStorageClassPrivate; - register_info->descriptor_array = NULL; -@@ -3638,7 +3638,7 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp - - if (reg->type == VKD3DSPR_CONSTBUFFER) - { -- assert(!reg->idx[0].rel_addr); -+ VKD3D_ASSERT(!reg->idx[0].rel_addr); - if (register_info->descriptor_array) - indexes[index_count++] = spirv_compiler_get_descriptor_index(compiler, reg, - register_info->descriptor_array, register_info->binding_base_idx, VKD3D_SHADER_RESOURCE_BUFFER); -@@ -3756,7 +3756,7 @@ static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, - { - if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) - { -- assert(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); -+ VKD3D_ASSERT(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); - components[component_idx++] = val_id; - } - } -@@ -3781,7 +3781,7 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil - uint32_t type_id; - unsigned int i; - -- assert(component_count <= ARRAY_SIZE(components)); -+ VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); - - for (i = 0; i < component_count; ++i) - { -@@ -3804,7 +3804,7 @@ static uint32_t spirv_compiler_emit_int_to_bool(struct spirv_compiler *compiler, - uint32_t type_id; - SpvOp op; - -- assert(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); -+ VKD3D_ASSERT(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); - op = condition & VKD3D_SHADER_CONDITIONAL_OP_Z ? SpvOpIEqual : SpvOpINotEqual; -@@ -3934,7 +3934,7 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile - uint32_t values[VKD3D_VEC4_SIZE] = {0}; - unsigned int i, j; - -- assert(reg->type == VKD3DSPR_IMMCONST); -+ VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST); - - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { -@@ -3962,7 +3962,7 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi - uint64_t values[VKD3D_DVEC2_SIZE] = {0}; - unsigned int i, j; - -- assert(reg->type == VKD3DSPR_IMMCONST64); -+ VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST64); - - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { -@@ -3989,7 +3989,7 @@ static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id; - -- assert(reg->type == VKD3DSPR_UNDEF); -+ VKD3D_ASSERT(reg->type == VKD3DSPR_UNDEF); - - type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); - return vkd3d_spirv_get_op_undef(builder, type_id); -@@ -4005,8 +4005,8 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type; - uint32_t skipped_component_mask; - -- assert(!register_is_constant_or_undef(reg)); -- assert(vsir_write_mask_component_count(write_mask) == 1); -+ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); -+ VKD3D_ASSERT(vsir_write_mask_component_count(write_mask) == 1); - - component_idx = vsir_write_mask_get_component_idx(write_mask); - component_idx = vsir_swizzle_get_component(swizzle, component_idx); -@@ -4129,8 +4129,8 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil - static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg) - { -- assert(reg->idx[0].offset < compiler->ssa_register_count); -- assert(reg->idx_count == 1); -+ VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); -+ VKD3D_ASSERT(reg->idx_count == 1); - return &compiler->ssa_register_info[reg->idx[0].offset]; - } - -@@ -4138,7 +4138,7 @@ static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *co - const struct vkd3d_shader_register *reg, uint32_t val_id) - { - unsigned int i = reg->idx[0].offset; -- assert(i < compiler->ssa_register_count); -+ VKD3D_ASSERT(i < compiler->ssa_register_count); - compiler->ssa_register_info[i].data_type = reg->data_type; - compiler->ssa_register_info[i].id = val_id; - } -@@ -4158,10 +4158,10 @@ static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler - if (!val_id) - { - /* Should only be from a missing instruction implementation. */ -- assert(compiler->failed); -+ VKD3D_ASSERT(compiler->failed); - return 0; - } -- assert(vkd3d_swizzle_is_scalar(swizzle, reg)); -+ VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); - - reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); - -@@ -4383,7 +4383,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, - unsigned int i, src_idx, dst_idx; - uint32_t type_id, dst_val_id; - -- assert(write_mask); -+ VKD3D_ASSERT(write_mask); - - component_count = vsir_write_mask_component_count(write_mask); - dst_component_count = vsir_write_mask_component_count(dst_write_mask); -@@ -4408,7 +4408,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, - type_id = vkd3d_spirv_get_type_id(builder, component_type, dst_component_count); - dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone); - -- assert(component_count <= ARRAY_SIZE(components)); -+ VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); - - for (i = 0, src_idx = 0, dst_idx = 0; dst_idx < VKD3D_VEC4_SIZE; ++dst_idx) - { -@@ -4437,7 +4437,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, - uint32_t src_write_mask = write_mask; - uint32_t type_id; - -- assert(!register_is_constant_or_undef(reg)); -+ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); - - if (reg->type == VKD3DSPR_SSA) - { -@@ -4496,7 +4496,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, - static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst, uint32_t val_id) - { -- assert(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); -+ VKD3D_ASSERT(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); - if (dst->modifiers & VKD3DSPDM_SATURATE) - val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); - -@@ -4928,7 +4928,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler - { - struct vkd3d_shader_register r; - -- assert(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); - - vsir_register_init(&r, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); - return spirv_compiler_get_register_id(compiler, &r); -@@ -5048,7 +5048,7 @@ static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *co - unsigned int sizes[2]; - uint32_t id; - -- assert(size_count <= ARRAY_SIZE(sizes)); -+ VKD3D_ASSERT(size_count <= ARRAY_SIZE(sizes)); - memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); - array_sizes = sizes; - sizes[0] = max(sizes[0], builtin->spirv_array_size); -@@ -5210,7 +5210,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); - reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; -- assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); -+ VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); - spirv_compiler_put_symbol(compiler, ®_symbol); - - vkd3d_spirv_build_op_name(builder, var_id, reg_type == VKD3DSPR_PATCHCONST ? "vpc%u" : "v%u", element_idx); -@@ -5256,8 +5256,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, - uint32_t write_mask; - uint32_t input_id; - -- assert(!reg->idx_count || !reg->idx[0].rel_addr); -- assert(reg->idx_count < 2); -+ VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); -+ VKD3D_ASSERT(reg->idx_count < 2); - - if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { -@@ -5391,8 +5391,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - uint32_t write_mask; - uint32_t output_id; - -- assert(!reg->idx_count || !reg->idx[0].rel_addr); -- assert(reg->idx_count < 2); -+ VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); -+ VKD3D_ASSERT(reg->idx_count < 2); - - if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { -@@ -5578,7 +5578,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); - reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; -- assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); -+ VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); - - spirv_compiler_put_symbol(compiler, ®_symbol); - -@@ -5916,7 +5916,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t - function_location = spirv_compiler_get_current_function_location(compiler); - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); - -- assert(!compiler->temp_count); -+ VKD3D_ASSERT(!compiler->temp_count); - compiler->temp_count = count; - for (i = 0; i < compiler->temp_count; ++i) - { -@@ -5924,7 +5924,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t - SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - if (!i) - compiler->temp_id = id; -- assert(id == compiler->temp_id + i); -+ VKD3D_ASSERT(id == compiler->temp_id + i); - - vkd3d_spirv_build_op_name(builder, id, "r%u", i); - } -@@ -5934,7 +5934,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t - - static void spirv_compiler_allocate_ssa_register_ids(struct spirv_compiler *compiler, unsigned int count) - { -- assert(!compiler->ssa_register_info); -+ VKD3D_ASSERT(!compiler->ssa_register_info); - if (!(compiler->ssa_register_info = vkd3d_calloc(count, sizeof(*compiler->ssa_register_info)))) - { - ERR("Failed to allocate SSA register value id array, count %u.\n", count); -@@ -6036,7 +6036,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com - vkd3d_spirv_build_op_decorate1(builder, member_ids[j], SpvDecorationArrayStride, 4); - descriptor_offsets_member_idx = j; - compiler->descriptor_offsets_member_id = spirv_compiler_get_constant_uint(compiler, j); -- assert(j == count - 1); -+ VKD3D_ASSERT(j == count - 1); - } - - struct_id = vkd3d_spirv_build_op_type_struct(builder, member_ids, count); -@@ -6523,7 +6523,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - - if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) - { -- assert(structure_stride); /* counters are valid only for structured buffers */ -+ VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ - - counter_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - if (spirv_compiler_is_opengl_target(compiler)) -@@ -6866,7 +6866,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - uint32_t function_id, void_id, function_type_id; - struct vkd3d_shader_phase *phase; - -- assert(compiler->phase != instruction->opcode); -+ VKD3D_ASSERT(compiler->phase != instruction->opcode); - - if (!is_in_default_phase(compiler)) - spirv_compiler_leave_shader_phase(compiler); -@@ -6943,7 +6943,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile - input_reg.idx[1].offset = 0; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - -- assert(input_signature->element_count == output_signature->element_count); -+ VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *output = &output_signature->elements[i]; -@@ -6951,8 +6951,8 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile - struct vkd3d_shader_register_info output_reg_info; - struct vkd3d_shader_register output_reg; - -- assert(input->mask == output->mask); -- assert(input->component_type == output->component_type); -+ VKD3D_ASSERT(input->mask == output->mask); -+ VKD3D_ASSERT(input->component_type == output->component_type); - - input_reg.idx[1].offset = i; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); -@@ -7120,7 +7120,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t val_id; - -- assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); -+ VKD3D_ASSERT(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); - - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) -@@ -7199,8 +7199,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - return VKD3D_ERROR_INVALID_SHADER; - } - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); - - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - -@@ -7330,8 +7330,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - - instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); - - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - -@@ -7472,7 +7472,7 @@ static void spirv_compiler_emit_swapc(struct spirv_compiler *compiler, - uint32_t condition_id, src1_id, src2_id, type_id, val_id; - unsigned int component_count; - -- assert(dst[0].write_mask == dst[1].write_mask); -+ VKD3D_ASSERT(dst[0].write_mask == dst[1].write_mask); - - condition_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); - src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); -@@ -7511,7 +7511,7 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, - else - write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; - -- assert(instruction->src_count == ARRAY_SIZE(src_ids)); -+ VKD3D_ASSERT(instruction->src_count == ARRAY_SIZE(src_ids)); - for (i = 0; i < ARRAY_SIZE(src_ids); ++i) - src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], write_mask); - -@@ -7703,8 +7703,8 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type; - unsigned int component_count; - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count == 1); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count == 1); - - /* OpConvertFToI has undefined results if the result cannot be represented - * as a signed integer, but Direct3D expects the result to saturate, -@@ -7756,8 +7756,8 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, - uint32_t src_type_id, dst_type_id, condition_type_id; - unsigned int component_count; - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count == 1); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count == 1); - - /* OpConvertFToU has undefined results if the result cannot be represented - * as an unsigned integer, but Direct3D expects the result to saturate, -@@ -7805,7 +7805,7 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp - SpvOp op; - - src_count = instruction->src_count; -- assert(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); -+ VKD3D_ASSERT(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); - - component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); -@@ -7823,7 +7823,7 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp - return; - } - -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, k = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -7867,7 +7867,7 @@ static void spirv_compiler_emit_f16tof32(struct spirv_compiler *compiler, - scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); - - /* FIXME: Consider a single UnpackHalf2x16 instruction per 2 components. */ -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -7901,7 +7901,7 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, - zero_id = spirv_compiler_get_constant_float(compiler, 0.0f); - - /* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */ -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -8331,8 +8331,8 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - if (info->needs_derivative_control) - vkd3d_spirv_enable_capability(builder, SpvCapabilityDerivativeControl); - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count == 1); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count == 1); - - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -@@ -8366,7 +8366,7 @@ static const struct vkd3d_symbol *spirv_compiler_find_resource(struct spirv_comp - - vkd3d_symbol_make_resource(&resource_key, resource_reg); - entry = rb_get(&compiler->symbol_table, &resource_key); -- assert(entry); -+ VKD3D_ASSERT(entry); - return RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - } - -@@ -8475,8 +8475,8 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, - { - struct vkd3d_shader_register_info register_info; - -- assert(image->image_id); -- assert(sampler_reg); -+ VKD3D_ASSERT(image->image_id); -+ VKD3D_ASSERT(sampler_reg); - - if (!spirv_compiler_get_register_info(compiler, sampler_reg, ®ister_info)) - ERR("Failed to get sampler register info.\n"); -@@ -8559,7 +8559,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, - &src[2], VKD3DSP_WRITEMASK_0); - } -- assert(image_operand_count <= ARRAY_SIZE(image_operands)); -+ VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); - val_id = vkd3d_spirv_build_op_image_fetch(builder, type_id, - image.image_id, coordinate_id, operands_mask, image_operands, image_operand_count); - -@@ -8653,7 +8653,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - - sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); - coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); -- assert(image_operand_count <= ARRAY_SIZE(image_operands)); -+ VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); - val_id = vkd3d_spirv_build_op_image_sample(builder, op, sampled_type_id, - image.sampled_image_id, coordinate_id, operands_mask, image_operands, image_operand_count); - -@@ -8838,7 +8838,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler - type_id, resource_symbol->info.resource.structure_stride, - &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -8870,7 +8870,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler - type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - - texel_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -8913,7 +8913,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -8976,7 +8976,7 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * - &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - - data = &src[instruction->src_count - 1]; -- assert(data->reg.data_type == VKD3D_DATA_UINT); -+ VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); - val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); - - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -9004,7 +9004,7 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * - type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - - data = &src[instruction->src_count - 1]; -- assert(data->reg.data_type == VKD3D_DATA_UINT); -+ VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); - val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); - - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -9185,7 +9185,7 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c - - resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); - counter_id = resource_symbol->info.resource.uav_counter_id; -- assert(counter_id); -+ VKD3D_ASSERT(counter_id); - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - -@@ -9350,14 +9350,14 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - if (structure_stride || raw) - { -- assert(!raw != !structure_stride); -+ VKD3D_ASSERT(!raw != !structure_stride); - coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, structure_stride, &src[0], VKD3DSP_WRITEMASK_0, - &src[0], VKD3DSP_WRITEMASK_1); - } - else - { -- assert(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); -+ VKD3D_ASSERT(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); - coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], coordinate_mask); - } - -@@ -9725,7 +9725,7 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, - } - else - { -- assert(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); -+ VKD3D_ASSERT(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); - op = GLSLstd450InterpolateAtSample; - src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); - } -@@ -10473,7 +10473,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_WAVE_READ_LANE_FIRST: - spirv_compiler_emit_wave_read_lane_first(compiler, instruction); - break; -- case VKD3DSIH_DCL: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - case VKD3DSIH_DCL_INPUT_SGV: -@@ -10483,7 +10482,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_DCL_RESOURCE_RAW: - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -- case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_UAV_RAW: - case VKD3DSIH_DCL_UAV_STRUCTURED: - case VKD3DSIH_DCL_UAV_TYPED: -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index 3a9a402e8e2..d6d5bbc1c07 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -1716,7 +1716,7 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( - const struct vkd3d_sm4_register_type_info *register_type_info = - get_info_from_vkd3d_register_type(lookup, vkd3d_type); - -- assert(register_type_info); -+ VKD3D_ASSERT(register_type_info); - return register_type_info->default_src_swizzle_type; - } - -@@ -2887,7 +2887,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - continue; - - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- assert(ret); -+ VKD3D_ASSERT(ret); - if (usage == ~0u) - continue; - usage_idx = var->semantic.index; -@@ -2898,7 +2898,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - } - else - { -- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - -@@ -2975,7 +2975,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - switch (type->class) - { - case HLSL_CLASS_MATRIX: -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else -@@ -3002,6 +3002,10 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: - case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: - break; - } - vkd3d_unreachable(); -@@ -3082,7 +3086,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - else - { -- assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); -+ VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, 0)); -@@ -3665,9 +3669,9 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod - switch (imod->type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: -- assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); -- assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); -- assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); -+ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); -+ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); -+ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); - word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; -@@ -3706,7 +3710,7 @@ struct sm4_instruction - static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, - const struct hlsl_ir_node *instr) - { -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - reg->type = VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = instr->reg.id; -@@ -3725,7 +3729,7 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s - reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - reg->dimension = VSIR_DIMENSION_VEC4; - -- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - - if (!var->indexable) - { -@@ -3744,13 +3748,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s - struct vkd3d_shader_src_param *idx_src; - unsigned int idx_writemask; - -- assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); -+ VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); - idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; - memset(idx_src, 0, sizeof(*idx_src)); - - reg->idx[1].rel_addr = idx_src; - sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); -- assert(idx_writemask != 0); -+ VKD3D_ASSERT(idx_writemask != 0); - idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); - } - } -@@ -3786,7 +3790,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } -- assert(regset == HLSL_REGSET_TEXTURES); -+ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) -@@ -3805,7 +3809,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } -- assert(regset == HLSL_REGSET_UAVS); -+ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) -@@ -3824,14 +3828,14 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - reg->idx_count = 1; - } -- assert(regset == HLSL_REGSET_SAMPLERS); -+ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - -- assert(data_type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; - if (hlsl_version_ge(ctx, 5, 1)) -@@ -3871,7 +3875,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - -- assert(hlsl_reg.allocated); -+ VKD3D_ASSERT(hlsl_reg.allocated); - reg->type = VKD3DSPR_INPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; -@@ -3903,7 +3907,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - -- assert(hlsl_reg.allocated); -+ VKD3D_ASSERT(hlsl_reg.allocated); - reg->type = VKD3DSPR_OUTPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; -@@ -4039,7 +4043,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v - switch (sm4_swizzle_type) - { - case VKD3D_SM4_SWIZZLE_NONE: -- assert(sm4_swizzle || register_is_constant(reg)); -+ VKD3D_ASSERT(sm4_swizzle || register_is_constant(reg)); - token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; - break; - -@@ -4071,16 +4075,16 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct - const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; - uint32_t idx_src_token; - -- assert(idx_src); -- assert(!idx_src->modifiers); -- assert(idx_src->reg.type != VKD3DSPR_IMMCONST); -+ VKD3D_ASSERT(idx_src); -+ VKD3D_ASSERT(!idx_src->modifiers); -+ VKD3D_ASSERT(idx_src->reg.type != VKD3DSPR_IMMCONST); - idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle); - - put_u32(buffer, idx_src_token); - for (k = 0; k < idx_src->reg.idx_count; ++k) - { - put_u32(buffer, idx_src->reg.idx[k].offset); -- assert(!idx_src->reg.idx[k].rel_addr); -+ VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); - } - } - else -@@ -4280,7 +4284,7 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) - instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; - -- assert(resource->regset == HLSL_REGSET_SAMPLERS); -+ VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); - - for (i = 0; i < resource->bind_count; ++i) - { -@@ -4289,7 +4293,7 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex - - if (hlsl_version_ge(tpf->ctx, 5, 1)) - { -- assert(!i); -+ VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; - instr.dsts[0].reg.idx[1].offset = resource->index; - instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -@@ -4315,7 +4319,7 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - struct sm4_instruction instr; - unsigned int i; - -- assert(resource->regset == regset); -+ VKD3D_ASSERT(resource->regset == regset); - - component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); - -@@ -4337,7 +4341,7 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - - if (hlsl_version_ge(tpf->ctx, 5, 1)) - { -- assert(!i); -+ VKD3D_ASSERT(!i); - instr.dsts[0].reg.idx[0].offset = resource->id; - instr.dsts[0].reg.idx[1].offset = resource->index; - instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -@@ -4589,7 +4593,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - -- assert(dst_idx < ARRAY_SIZE(instr.dsts)); -+ VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -@@ -4648,7 +4652,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - -- assert(dst_idx < ARRAY_SIZE(instr.dsts)); -+ VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -@@ -4846,7 +4850,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - -- assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -+ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -@@ -4875,7 +4879,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir - return; - } - -- assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -+ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_RESINFO; -@@ -4929,7 +4933,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - const struct hlsl_type *src_type = arg1->data_type; - - /* Narrowing casts were already lowered. */ -- assert(src_type->dimx == dst_type->dimx); -+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); - - switch (dst_type->e.numeric.type) - { -@@ -5071,7 +5075,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - const struct hlsl_type *dst_type = expr->node.data_type; - struct vkd3d_string_buffer *dst_type_string; - -- assert(expr->node.reg.allocated); -+ VKD3D_ASSERT(expr->node.reg.allocated); - - if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) - return; -@@ -5099,7 +5103,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP1_BIT_NOT: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - -@@ -5108,67 +5112,73 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP1_CEIL: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_COS: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_DSX: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_COARSE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_FINE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_COARSE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_FINE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_EXP2: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - -+ case HLSL_OP1_F16TOF32: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); -+ break; -+ - case HLSL_OP1_FLOOR: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - -@@ -5204,7 +5214,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - struct sm4_instruction instr; - struct hlsl_constant_value one; - -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_DIV; -@@ -5232,34 +5242,34 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP1_ROUND: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_TRUNC: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); - break; - -@@ -5281,17 +5291,17 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_BIT_AND: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - -@@ -5344,7 +5354,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5370,7 +5380,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5399,7 +5409,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5425,18 +5435,18 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - - case HLSL_OP2_LOGIC_AND: -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: -- assert(type_is_integer(dst_type)); -- assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - -@@ -5515,7 +5525,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5538,8 +5548,8 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - - case HLSL_OP2_RSHIFT: -- assert(type_is_integer(dst_type)); -- assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; -@@ -5563,7 +5573,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * - .src_count = 1, - }; - -- assert(iff->condition.node->data_type->dimx == 1); -+ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(tpf, &instr); -@@ -5641,7 +5651,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo - sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; - -- assert(hlsl_is_numeric_type(type)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) - { - struct hlsl_constant_value value; -@@ -5758,7 +5768,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_GRAD: - /* Combined sample expressions were lowered. */ -- assert(load->sampler.var); -+ VKD3D_ASSERT(load->sampler.var); - write_sm4_sample(tpf, load); - break; - -@@ -5911,7 +5921,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - - if (!instr->reg.allocated) - { -- assert(instr->type == HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); - continue; - } - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index fdbde019111..3c1ffcdbee3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -62,7 +62,7 @@ void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer) - buffer->buffer_size = 16; - buffer->content_size = 0; - buffer->buffer = vkd3d_malloc(buffer->buffer_size); -- assert(buffer->buffer); -+ VKD3D_ASSERT(buffer->buffer); - memset(buffer->buffer, 0, buffer->buffer_size); - } - -@@ -230,7 +230,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct - { - if (!buffer) - return; -- assert(cache->count + 1 <= cache->max_count); -+ VKD3D_ASSERT(cache->count + 1 <= cache->max_count); - cache->buffers[cache->count++] = buffer; - } - -@@ -431,7 +431,7 @@ static void bytecode_set_bytes(struct vkd3d_bytecode_buffer *buffer, size_t offs - if (buffer->status) - return; - -- assert(vkd3d_bound_range(offset, size, buffer->size)); -+ VKD3D_ASSERT(vkd3d_bound_range(offset, size, buffer->size)); - memcpy(buffer->data + offset, value, size); - } - -@@ -644,7 +644,7 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig - signature->element_count = src->element_count; - if (!src->elements) - { -- assert(!signature->element_count); -+ VKD3D_ASSERT(!signature->element_count); - signature->elements = NULL; - return true; - } -@@ -789,7 +789,7 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_push_cf_info(struct vkd3d_ - - static void vkd3d_shader_scan_pop_cf_info(struct vkd3d_shader_scan_context *context) - { -- assert(context->cf_info_count); -+ VKD3D_ASSERT(context->cf_info_count); - - --context->cf_info_count; - } -@@ -2067,7 +2067,7 @@ bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *ins - bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *instructions, - unsigned int idx, unsigned int count) - { -- assert(idx <= instructions->count); -+ VKD3D_ASSERT(idx <= instructions->count); - - if (!shader_instruction_array_reserve(instructions, instructions->count + count)) - return false; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 7aff22e3420..13b4dab76d1 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -1311,14 +1311,14 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, - static inline struct vkd3d_shader_src_param *shader_src_param_allocator_get( - struct vkd3d_shader_param_allocator *allocator, unsigned int count) - { -- assert(allocator->stride == sizeof(struct vkd3d_shader_src_param)); -+ VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_src_param)); - return shader_param_allocator_get(allocator, count); - } - - static inline struct vkd3d_shader_dst_param *shader_dst_param_allocator_get( - struct vkd3d_shader_param_allocator *allocator, unsigned int count) - { -- assert(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); -+ VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); - return shader_param_allocator_get(allocator, count); - } - -@@ -1678,7 +1678,7 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask - { - unsigned int i; - -- assert(write_mask); -+ VKD3D_ASSERT(write_mask); - for (i = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) -@@ -1692,13 +1692,13 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask - static inline unsigned int vsir_write_mask_component_count(uint32_t write_mask) - { - unsigned int count = vkd3d_popcount(write_mask & VKD3DSP_WRITEMASK_ALL); -- assert(1 <= count && count <= VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(1 <= count && count <= VKD3D_VEC4_SIZE); - return count; - } - - static inline unsigned int vkd3d_write_mask_from_component_count(unsigned int component_count) - { -- assert(component_count <= VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(component_count <= VKD3D_VEC4_SIZE); - return (VKD3DSP_WRITEMASK_0 << component_count) - 1; - } - -diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c -index a0a29ed30cb..11d87ac1d98 100644 ---- a/libs/vkd3d/libs/vkd3d/cache.c -+++ b/libs/vkd3d/libs/vkd3d/cache.c -@@ -69,7 +69,14 @@ static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry - static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, - struct shader_cache_entry *e) - { -- rb_put(&cache->tree, &e->h.hash, &e->entry); -+ const struct shader_cache_key k = -+ { -+ .hash = e->h.hash, -+ .key_size = e->h.key_size, -+ .key = e->payload -+ }; -+ -+ rb_put(&cache->tree, &k, &e->entry); - } - - int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index a484da94092..dcc7690876f 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -93,7 +93,7 @@ VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue) - - vkd3d_mutex_lock(&queue->mutex); - -- assert(queue->vk_queue); -+ VKD3D_ASSERT(queue->vk_queue); - return queue->vk_queue; - } - -@@ -423,7 +423,7 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, - static const struct d3d12_root_parameter *root_signature_get_parameter( - const struct d3d12_root_signature *root_signature, unsigned int index) - { -- assert(index < root_signature->parameter_count); -+ VKD3D_ASSERT(index < root_signature->parameter_count); - return &root_signature->parameters[index]; - } - -@@ -431,7 +431,7 @@ static const struct d3d12_root_descriptor_table *root_signature_get_descriptor_t - const struct d3d12_root_signature *root_signature, unsigned int index) - { - const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); -- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); -+ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); - return &p->u.descriptor_table; - } - -@@ -439,7 +439,7 @@ static const struct d3d12_root_constant *root_signature_get_32bit_constants( - const struct d3d12_root_signature *root_signature, unsigned int index) - { - const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); -- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); -+ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); - return &p->u.constant; - } - -@@ -447,7 +447,7 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( - const struct d3d12_root_signature *root_signature, unsigned int index) - { - const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); -- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV -+ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV - || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV - || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV); - return p; -@@ -528,7 +528,7 @@ static void d3d12_fence_garbage_collect_vk_semaphores_locked(struct d3d12_fence - - if (current->u.binary.vk_fence) - WARN("Destroying potentially pending semaphore.\n"); -- assert(!current->u.binary.is_acquired); -+ VKD3D_ASSERT(!current->u.binary.is_acquired); - - VK_CALL(vkDestroySemaphore(device->vk_device, current->u.binary.vk_semaphore, NULL)); - fence->semaphores[i] = fence->semaphores[--fence->semaphore_count]; -@@ -599,7 +599,7 @@ static void d3d12_fence_remove_vk_semaphore(struct d3d12_fence *fence, struct vk - { - vkd3d_mutex_lock(&fence->mutex); - -- assert(semaphore->u.binary.is_acquired); -+ VKD3D_ASSERT(semaphore->u.binary.is_acquired); - - *semaphore = fence->semaphores[--fence->semaphore_count]; - -@@ -610,7 +610,7 @@ static void d3d12_fence_release_vk_semaphore(struct d3d12_fence *fence, struct v - { - vkd3d_mutex_lock(&fence->mutex); - -- assert(semaphore->u.binary.is_acquired); -+ VKD3D_ASSERT(semaphore->u.binary.is_acquired); - semaphore->u.binary.is_acquired = false; - - vkd3d_mutex_unlock(&fence->mutex); -@@ -1154,7 +1154,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) - - if (!(iface1 = (ID3D12Fence1 *)iface)) - return NULL; -- assert(iface1->lpVtbl == &d3d12_fence_vtbl); -+ VKD3D_ASSERT(iface1->lpVtbl == &d3d12_fence_vtbl); - return impl_from_ID3D12Fence1(iface1); - } - -@@ -1792,7 +1792,7 @@ static struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(I - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_command_allocator_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_allocator_vtbl); - return impl_from_ID3D12CommandAllocator(iface); - } - -@@ -1942,9 +1942,9 @@ static void d3d12_command_signature_decref(struct d3d12_command_signature *signa - } - - /* ID3D12CommandList */ --static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList5(ID3D12GraphicsCommandList5 *iface) -+static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList6(ID3D12GraphicsCommandList6 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); - } - - static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) -@@ -2168,7 +2168,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - } - - /* Handle read-only states. */ -- assert(!is_write_resource_state(state)); -+ VKD3D_ASSERT(!is_write_resource_state(state)); - - if (state & D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER) - { -@@ -2242,7 +2242,7 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 - VkPipelineStageFlags src_stage_mask, dst_stage_mask; - VkImageMemoryBarrier barrier; - -- assert(d3d12_resource_is_texture(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); - - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = NULL; -@@ -2289,12 +2289,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList6 *iface, - REFIID iid, void **object) - { - TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); - -- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) -+ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList6) -+ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) -@@ -2305,7 +2306,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - || IsEqualGUID(iid, &IID_ID3D12Object) - || IsEqualGUID(iid, &IID_IUnknown)) - { -- ID3D12GraphicsCommandList5_AddRef(iface); -+ ID3D12GraphicsCommandList6_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -2316,9 +2317,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList5 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - unsigned int refcount = vkd3d_atomic_increment_u32(&list->refcount); - - TRACE("%p increasing refcount to %u.\n", list, refcount); -@@ -2331,9 +2332,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind - vkd3d_free(bindings->vk_uav_counter_views); - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList5 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - unsigned int refcount = vkd3d_atomic_decrement_u32(&list->refcount); - - TRACE("%p decreasing refcount to %u.\n", list, refcount); -@@ -2359,66 +2360,67 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList6 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_get_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList6 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_set_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList6 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&list->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList5 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList6 *iface, const WCHAR *name) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList5 *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList6 *iface, -+ REFIID iid, void **device) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(list->device, iid, device); - } - --static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList5 *iface) -+static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p.\n", iface); - - return list->type; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList5 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - VkResult vr; - -@@ -2462,7 +2464,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - ID3D12PipelineState *initial_pipeline_state) - { -- ID3D12GraphicsCommandList5 *iface = &list->ID3D12GraphicsCommandList5_iface; -+ ID3D12GraphicsCommandList6 *iface = &list->ID3D12GraphicsCommandList6_iface; - - memset(list->strides, 0, sizeof(list->strides)); - list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; -@@ -2498,14 +2500,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - - list->descriptor_heap_count = 0; - -- ID3D12GraphicsCommandList5_SetPipelineState(iface, initial_pipeline_state); -+ ID3D12GraphicsCommandList6_SetPipelineState(iface, initial_pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList6 *iface, - ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) - { - struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - HRESULT hr; - - TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", -@@ -2532,7 +2534,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL - return hr; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList6 *iface, - ID3D12PipelineState *pipeline_state) - { - FIXME("iface %p, pipeline_state %p stub!\n", iface, pipeline_state); -@@ -2542,7 +2544,7 @@ static bool d3d12_command_list_has_depth_stencil_view(struct d3d12_command_list - { - struct d3d12_graphics_pipeline_state *graphics; - -- assert(d3d12_pipeline_state_is_graphics(list->state)); -+ VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(list->state)); - graphics = &list->state->u.graphics; - - return graphics->dsv_format || (d3d12_pipeline_state_has_unknown_dsv_format(list->state) && list->dsv_format); -@@ -3051,7 +3053,7 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma - const struct vkd3d_shader_uav_counter_binding *uav_counter = &state->uav_counters.bindings[i]; - const VkBufferView *vk_uav_counter_views = bindings->vk_uav_counter_views; - -- assert(vk_uav_counter_views[i]); -+ VKD3D_ASSERT(vk_uav_counter_views[i]); - - vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[i].pNext = NULL; -@@ -3324,7 +3326,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list - return true; - - vk_render_pass = list->pso_render_pass; -- assert(vk_render_pass); -+ VKD3D_ASSERT(vk_render_pass); - - begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - begin_desc.pNext = NULL; -@@ -3380,11 +3382,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList6 *iface, - UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, - UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " -@@ -3404,11 +3406,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom - instance_count, start_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList6 *iface, - UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, - INT base_vertex_location, UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " -@@ -3430,10 +3432,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap - instance_count, start_vertex_location, base_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList6 *iface, - UINT x, UINT y, UINT z) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); -@@ -3449,10 +3451,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL - VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy buffer_copy; -@@ -3464,9 +3466,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics - vk_procs = &list->device->vk_procs; - - dst_resource = unsafe_impl_from_ID3D12Resource(dst); -- assert(d3d12_resource_is_buffer(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); - src_resource = unsafe_impl_from_ID3D12Resource(src); -- assert(d3d12_resource_is_buffer(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); - - d3d12_command_list_track_resource_usage(list, dst_resource); - d3d12_command_list_track_resource_usage(list, src_resource); -@@ -3667,11 +3669,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - src_format->dxgi_format, src_format->vk_format, - dst_format->dxgi_format, dst_format->vk_format); - -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -- assert(!vkd3d_format_is_compressed(dst_format)); -- assert(!vkd3d_format_is_compressed(src_format)); -- assert(dst_format->byte_count == src_format->byte_count); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(!vkd3d_format_is_compressed(dst_format)); -+ VKD3D_ASSERT(!vkd3d_format_is_compressed(src_format)); -+ VKD3D_ASSERT(dst_format->byte_count == src_format->byte_count); - - buffer_image_copy.bufferOffset = 0; - buffer_image_copy.bufferRowLength = 0; -@@ -3715,11 +3717,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - buffer_image_copy.imageSubresource.layerCount = layer_count; - dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - -- assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == -+ VKD3D_ASSERT(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == - d3d12_resource_desc_get_width(dst_desc, dst_miplevel_idx)); -- assert(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == -+ VKD3D_ASSERT(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == - d3d12_resource_desc_get_height(dst_desc, dst_miplevel_idx)); -- assert(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == -+ VKD3D_ASSERT(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == - d3d12_resource_desc_get_depth(dst_desc, dst_miplevel_idx)); - - VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer, -@@ -3734,11 +3736,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) - && box->back > box->front; - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList6 *iface, - const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, - const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *src_format, *dst_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3767,8 +3769,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX - && dst->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT) - { -- assert(d3d12_resource_is_buffer(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); - - if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device, - &src_resource->desc, dst->u.PlacedFootprint.Footprint.Format))) -@@ -3796,8 +3798,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - else if (src->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT - && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) - { -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_buffer(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); - - if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device, - &dst_resource->desc, src->u.PlacedFootprint.Footprint.Format))) -@@ -3825,8 +3827,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - else if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX - && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) - { -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); - - dst_format = dst_resource->format; - src_format = src_resource->format; -@@ -3859,10 +3861,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst, ID3D12Resource *src) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *dst_format, *src_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3885,8 +3887,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - - if (d3d12_resource_is_buffer(dst_resource)) - { -- assert(d3d12_resource_is_buffer(src_resource)); -- assert(src_resource->desc.Width == dst_resource->desc.Width); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); -+ VKD3D_ASSERT(src_resource->desc.Width == dst_resource->desc.Width); - - vk_buffer_copy.srcOffset = 0; - vk_buffer_copy.dstOffset = 0; -@@ -3900,10 +3902,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - dst_format = dst_resource->format; - src_format = src_resource->format; - -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -- assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); -- assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); -+ VKD3D_ASSERT(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); - - if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) - { -@@ -3929,7 +3931,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, - const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, - D3D12_TILE_COPY_FLAGS flags) -@@ -3940,11 +3942,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand - buffer, buffer_offset, flags); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst, UINT dst_sub_resource_idx, - ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_format *src_format, *dst_format, *vk_format; - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3960,8 +3962,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi - dst_resource = unsafe_impl_from_ID3D12Resource(dst); - src_resource = unsafe_impl_from_ID3D12Resource(src); - -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); - - d3d12_command_list_track_resource_usage(list, dst_resource); - d3d12_command_list_track_resource_usage(list, src_resource); -@@ -4007,10 +4009,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList6 *iface, - D3D12_PRIMITIVE_TOPOLOGY topology) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, topology %#x.\n", iface, topology); - -@@ -4021,11 +4023,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList6 *iface, - UINT viewport_count, const D3D12_VIEWPORT *viewports) - { - VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; - -@@ -4059,10 +4061,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo - VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList6 *iface, - UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -4087,10 +4089,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic - VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList6 *iface, - const FLOAT blend_factor[4]) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); -@@ -4099,10 +4101,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics - VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList6 *iface, - UINT stencil_ref) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); -@@ -4111,11 +4113,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC - VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList6 *iface, - ID3D12PipelineState *pipeline_state) - { - struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); - -@@ -4166,10 +4168,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA - return 0; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList6 *iface, - UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - bool have_aliasing_barriers = false, have_split_barriers = false; - const struct vkd3d_vk_device_procs *vk_procs; - const struct vkd3d_vulkan_info *vk_info; -@@ -4395,13 +4397,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList6 *iface, - ID3D12GraphicsCommandList *command_list) - { - FIXME("iface %p, command_list %p stub!\n", iface, command_list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList6 *iface, - UINT heap_count, ID3D12DescriptorHeap *const *heaps) - { - TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); -@@ -4427,10 +4429,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis - d3d12_command_list_invalidate_root_parameters(list, bind_point); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList6 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4438,10 +4440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G - unsafe_impl_from_ID3D12RootSignature(root_signature)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList6 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4457,9 +4459,9 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - struct d3d12_descriptor_heap *descriptor_heap; - struct d3d12_desc *desc; - -- assert(root_signature_get_descriptor_table(root_signature, index)); -+ VKD3D_ASSERT(root_signature_get_descriptor_table(root_signature, index)); - -- assert(index < ARRAY_SIZE(bindings->descriptor_tables)); -+ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->descriptor_tables)); - desc = d3d12_desc_from_gpu_handle(base_descriptor); - - if (bindings->descriptor_tables[index] == desc) -@@ -4480,10 +4482,10 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - bindings->descriptor_table_active_mask |= (uint64_t)1 << index; - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", - iface, root_parameter_index, debug_gpu_handle(base_descriptor)); -@@ -4492,10 +4494,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I - root_parameter_index, base_descriptor); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", - iface, root_parameter_index, debug_gpu_handle(base_descriptor)); -@@ -4517,10 +4519,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis - c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4529,10 +4531,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4541,10 +4543,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4553,10 +4555,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID - root_parameter_index, dst_offset, constant_count, data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4578,7 +4580,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - struct d3d12_resource *resource; - - root_parameter = root_signature_get_root_descriptor(root_signature, index); -- assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); -+ VKD3D_ASSERT(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); - - if (gpu_address) - { -@@ -4609,7 +4611,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); - VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - -- assert(index < ARRAY_SIZE(bindings->push_descriptors)); -+ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); - bindings->push_descriptors[index].u.cbv.vk_buffer = buffer_info.buffer; - bindings->push_descriptors[index].u.cbv.offset = buffer_info.offset; - bindings->push_descriptor_dirty_mask |= 1u << index; -@@ -4618,9 +4620,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4629,9 +4631,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4652,7 +4654,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - VkBufferView vk_buffer_view; - - root_parameter = root_signature_get_root_descriptor(root_signature, index); -- assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); -+ VKD3D_ASSERT(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); - - /* FIXME: Re-use buffer views. */ - if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) -@@ -4682,7 +4684,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); - VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - -- assert(index < ARRAY_SIZE(bindings->push_descriptors)); -+ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); - bindings->push_descriptors[index].u.vk_buffer_view = vk_buffer_view; - bindings->push_descriptor_dirty_mask |= 1u << index; - bindings->push_descriptor_active_mask |= 1u << index; -@@ -4690,9 +4692,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4702,9 +4704,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4714,9 +4716,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4726,9 +4728,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4737,10 +4739,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV - root_parameter_index, address); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList6 *iface, - const D3D12_INDEX_BUFFER_VIEW *view) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - struct d3d12_resource *resource; - enum VkIndexType index_type; -@@ -4780,10 +4782,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics - view->BufferLocation - resource->gpu_address, index_type)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList6 *iface, - UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_null_resources *null_resources; - struct vkd3d_gpu_va_allocator *gpu_va_allocator; - VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; -@@ -4832,10 +4834,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList6 *iface, - UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; - VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; - VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; -@@ -4897,11 +4899,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm - VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList6 *iface, - UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, - BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct d3d12_rtv_desc *rtv_desc; - const struct d3d12_dsv_desc *dsv_desc; - VkFormat prev_dsv_format; -@@ -5102,12 +5104,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList6 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, - UINT rect_count, const D3D12_RECT *rects) - { - const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference ds_reference; -@@ -5151,10 +5153,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra - &clear_value, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList6 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference color_reference; -@@ -5481,11 +5483,11 @@ static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const st - return uint_view; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList6 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const UINT values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; - const struct vkd3d_resource_view *view; -@@ -5514,11 +5516,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - vkd3d_view_decref(uint_view, device); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList6 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const float values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; - const struct vkd3d_resource_view *view; -@@ -5547,16 +5549,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I - vkd3d_view_decref(uint_view, device); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) - { - FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList6 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - VkQueryControlFlags flags = 0; -@@ -5583,10 +5585,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman - VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList6 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - -@@ -5628,12 +5630,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) - return sizeof(uint64_t); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList6 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, - ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) - { - const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i, first, count; -@@ -5709,10 +5711,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); - const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -5781,19 +5783,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList6 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList6 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList5 *iface) -+static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList6 *iface) - { - FIXME("iface %p stub!\n", iface); - } -@@ -5802,14 +5804,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN - STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); - STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList6 *iface, - ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, - UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) - { - struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); - struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); - struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -5908,7 +5910,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - d3d12_command_signature_decref(sig_impl); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5921,7 +5923,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5934,20 +5936,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, - FLOAT min, FLOAT max) - { - FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, - UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) - { - FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", - iface, sample_count, pixel_count, sample_positions); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, - ID3D12Resource *src_resource, UINT src_sub_resource_idx, - D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) -@@ -5959,16 +5961,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 - src_resource, src_sub_resource_idx, src_rect, format, mode); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList5 *iface, UINT mask) -+static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList6 *iface, UINT mask) - { - FIXME("iface %p, mask %#x stub!\n", iface, mask); - } - --static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList6 *iface, - UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, - const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *resource; - unsigned int i; - -@@ -5981,13 +5983,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList6 *iface, - ID3D12ProtectedResourceSession *protected_session) - { - FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList6 *iface, - UINT count, const D3D12_RENDER_PASS_RENDER_TARGET_DESC *render_targets, - const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC *depth_stencil, D3D12_RENDER_PASS_FLAGS flags) - { -@@ -5995,74 +5997,78 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsC - count, render_targets, depth_stencil, flags); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList5 *iface) -+static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList6 *iface) - { - FIXME("iface %p stub!\n", iface); - } - --static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList6 *iface, - ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) - { - FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, - meta_command, parameters_data, (uintptr_t)data_size_in_bytes); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList6 *iface, - ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) - { - FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, - meta_command, parameters_data, (uintptr_t)data_size_in_bytes); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, - const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *desc, UINT count, - const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *postbuild_info_descs) - { - FIXME("iface %p, desc %p, count %u, postbuild_info_descs %p stub!\n", iface, desc, count, postbuild_info_descs); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo(ID3D12GraphicsCommandList5 *iface, -- const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, -+static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo( -+ ID3D12GraphicsCommandList6 *iface, const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, - UINT structures_count, const D3D12_GPU_VIRTUAL_ADDRESS *src_structure_data) - { - FIXME("iface %p, desc %p, structures_count %u, src_structure_data %p stub!\n", - iface, desc, structures_count, src_structure_data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, -- D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, -- D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, -+ D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode) - { - FIXME("iface %p, dst_structure_data %#"PRIx64", src_structure_data %#"PRIx64", mode %u stub!\n", - iface, dst_structure_data, src_structure_data, mode); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList6 *iface, - ID3D12StateObject *state_object) - { - FIXME("iface %p, state_object %p stub!\n", iface, state_object); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList6 *iface, - const D3D12_DISPATCH_RAYS_DESC *desc) - { - FIXME("iface %p, desc %p stub!\n", iface, desc); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList6 *iface, - D3D12_SHADING_RATE rate, const D3D12_SHADING_RATE_COMBINER *combiners) - { - FIXME("iface %p, rate %#x, combiners %p stub!\n", iface, rate, combiners); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *rate_image) - { - FIXME("iface %p, rate_image %p stub!\n", iface, rate_image); - } - --static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = -+static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh(ID3D12GraphicsCommandList6 *iface, UINT x, UINT y, UINT z) -+{ -+ FIXME("iface %p, x %u, y %u, z %u stub!\n", iface, x, y, z); -+} -+ -+static const struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl = - { - /* IUnknown methods */ - d3d12_command_list_QueryInterface, -@@ -6153,14 +6159,16 @@ static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = - /* ID3D12GraphicsCommandList5 methods */ - d3d12_command_list_RSSetShadingRate, - d3d12_command_list_RSSetShadingRateImage, -+ /* ID3D12GraphicsCommandList6 methods */ -+ d3d12_command_list_DispatchMesh, - }; - - static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); -+ VKD3D_ASSERT(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); - } - - static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, -@@ -6169,7 +6177,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d - { - HRESULT hr; - -- list->ID3D12GraphicsCommandList5_iface.lpVtbl = &d3d12_command_list_vtbl; -+ list->ID3D12GraphicsCommandList6_iface.lpVtbl = &d3d12_command_list_vtbl; - list->refcount = 1; - - list->type = type; -@@ -6773,7 +6781,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu - } - - vk_semaphore = fence->timeline_semaphore; -- assert(vk_semaphore); -+ VKD3D_ASSERT(vk_semaphore); - } - else - { -@@ -6846,7 +6854,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu - return hr; - - vk_semaphore = fence->timeline_semaphore; -- assert(vk_semaphore); -+ VKD3D_ASSERT(vk_semaphore); - - return vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, - vk_semaphore, fence, timeline_value, vkd3d_queue); -@@ -7015,7 +7023,7 @@ static HRESULT d3d12_command_queue_wait_locked(struct d3d12_command_queue *comma - * until we have submitted, so the semaphore cannot be destroyed before the call to vkQueueSubmit. */ - vkd3d_mutex_unlock(&fence->mutex); - -- assert(fence->timeline_semaphore); -+ VKD3D_ASSERT(fence->timeline_semaphore); - timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; - timeline_submit_info.pNext = NULL; - timeline_submit_info.waitSemaphoreValueCount = 1; -@@ -7279,7 +7287,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - - queue->is_flushing = true; - -- assert(queue->aux_op_queue.count == 0); -+ VKD3D_ASSERT(queue->aux_op_queue.count == 0); - - while (queue->op_queue.count != 0) - { -@@ -7569,7 +7577,7 @@ struct d3d12_command_signature *unsafe_impl_from_ID3D12CommandSignature(ID3D12Co - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_command_signature_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_signature_vtbl); - return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface); - } - -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index 5fe381af90c..01841c89692 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -308,7 +308,7 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio - for (i = 0; i < required_extension_count; ++i) - { - if (!has_extension(extensions, count, required_extensions[i])) -- ERR("Required %s extension %s is not supported.\n", -+ WARN("Required %s extension %s is not supported.\n", - extension_type, debugstr_a(required_extensions[i])); - ++extension_count; - } -@@ -336,12 +336,12 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio - for (i = 0; i < user_extension_count; ++i) - { - if (!has_extension(extensions, count, user_extensions[i])) -- ERR("Required user %s extension %s is not supported.\n", -+ WARN("Required user %s extension %s is not supported.\n", - extension_type, debugstr_a(user_extensions[i])); - ++extension_count; - } - -- assert(!optional_user_extension_count || user_extension_supported); -+ VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); - for (i = 0; i < optional_user_extension_count; ++i) - { - if (has_extension(extensions, count, optional_user_extensions[i])) -@@ -403,7 +403,7 @@ static unsigned int vkd3d_enable_extensions(const char *extensions[], - { - extension_count = vkd3d_append_extension(extensions, extension_count, user_extensions[i]); - } -- assert(!optional_user_extension_count || user_extension_supported); -+ VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); - for (i = 0; i < optional_user_extension_count; ++i) - { - if (!user_extension_supported[i]) -@@ -584,7 +584,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - - if (!create_info->pfn_signal_event) - { -- ERR("Invalid signal event function pointer.\n"); -+ WARN("Invalid signal event function pointer.\n"); - return E_INVALIDARG; - } - if (!create_info->pfn_create_thread != !create_info->pfn_join_thread) -@@ -594,7 +594,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - } - if (create_info->wchar_size != 2 && create_info->wchar_size != 4) - { -- ERR("Unexpected WCHAR size %zu.\n", create_info->wchar_size); -+ WARN("Unexpected WCHAR size %zu.\n", create_info->wchar_size); - return E_INVALIDARG; - } - -@@ -1507,7 +1507,7 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct - for (i = 0; i < ARRAY_SIZE(additional_formats); ++i) - { - format = vkd3d_get_format(device, additional_formats[i], false); -- assert(format); -+ VKD3D_ASSERT(format); - - VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, format->vk_format, &properties)); - if (!((properties.linearTilingFeatures | properties.optimalTilingFeatures) & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) -@@ -2155,7 +2155,7 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, - vkd3d_free(extensions); - if (vr < 0) - { -- ERR("Failed to create Vulkan device, vr %d.\n", vr); -+ WARN("Failed to create Vulkan device, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - -@@ -2547,7 +2547,7 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) - return; - } - -- assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); -+ VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -@@ -3119,8 +3119,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *i - initial_pipeline_state, &object))) - return hr; - -- return return_interface(&object->ID3D12GraphicsCommandList5_iface, -- &IID_ID3D12GraphicsCommandList5, riid, command_list); -+ return return_interface(&object->ID3D12GraphicsCommandList6_iface, -+ &IID_ID3D12GraphicsCommandList6, riid, command_list); - } - - /* Direct3D feature levels restrict which formats can be optionally supported. */ -@@ -5254,7 +5254,7 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_device_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_device_vtbl); - return impl_from_ID3D12Device9(iface); - } - -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index 7d7f40c0953..ac29088b9cb 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -312,7 +312,7 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface) - - TRACE("%p increasing refcount to %u.\n", heap, refcount); - -- assert(!heap->is_private); -+ VKD3D_ASSERT(!heap->is_private); - - return refcount; - } -@@ -443,7 +443,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_heap_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_heap_vtbl); - return impl_from_ID3D12Heap(iface); - } - -@@ -950,8 +950,8 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - bool tiled; - HRESULT hr; - -- assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -- assert(d3d12_resource_validate_desc(desc, device) == S_OK); -+ VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -+ VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); - - if (!desc->MipLevels) - { -@@ -1044,7 +1044,7 @@ static bool d3d12_resource_validate_box(const struct d3d12_resource *resource, - depth = d3d12_resource_desc_get_depth(&resource->desc, mip_level); - - vkd3d_format = resource->format; -- assert(vkd3d_format); -+ VKD3D_ASSERT(vkd3d_format); - width_mask = vkd3d_format->block_width - 1; - height_mask = vkd3d_format->block_height - 1; - -@@ -1162,7 +1162,7 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 - - if (d3d12_resource_is_buffer(resource)) - { -- assert(subresource_count == 1); -+ VKD3D_ASSERT(subresource_count == 1); - - VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); - if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -@@ -1381,7 +1381,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource2 *iface - - static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) - { -- assert(resource->heap->map_ptr); -+ VKD3D_ASSERT(resource->heap->map_ptr); - return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; - } - -@@ -1771,7 +1771,7 @@ struct d3d12_resource *unsafe_impl_from_ID3D12Resource(ID3D12Resource *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); - return impl_from_ID3D12Resource(iface); - } - -@@ -2165,7 +2165,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, - - if (heap_offset > heap->desc.SizeInBytes || requirements.size > heap->desc.SizeInBytes - heap_offset) - { -- ERR("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", -+ WARN("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", - heap_offset, requirements.size, heap->desc.SizeInBytes); - return E_INVALIDARG; - } -@@ -2406,7 +2406,7 @@ static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_ - { - struct vkd3d_view *view; - -- assert(magic); -+ VKD3D_ASSERT(magic); - - if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) - { -@@ -2536,7 +2536,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea - writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; - break; - default: -- assert(false); -+ VKD3D_ASSERT(false); - break; - } - if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) -@@ -2725,7 +2725,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struc - { - struct d3d12_desc tmp; - -- assert(dst != src); -+ VKD3D_ASSERT(dst != src); - - tmp.s.u.object = d3d12_desc_get_object_ref(src, device); - descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); -@@ -2748,7 +2748,7 @@ static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12 - if (properties->storageTexelBufferOffsetSingleTexelAlignment - && properties->uniformTexelBufferOffsetSingleTexelAlignment) - { -- assert(!vkd3d_format_is_compressed(format)); -+ VKD3D_ASSERT(!vkd3d_format_is_compressed(format)); - return min(format->byte_count, alignment); - } - -@@ -2848,7 +2848,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - return false; - } - -- assert(d3d12_resource_is_buffer(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); - - return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, - format, offset * element_size, size * element_size, view); -@@ -2979,7 +2979,7 @@ static VkComponentSwizzle swizzle_vk_component(const VkComponentMapping *compone - break; - } - -- assert(component != VK_COMPONENT_SWIZZLE_IDENTITY); -+ VKD3D_ASSERT(component != VK_COMPONENT_SWIZZLE_IDENTITY); - return component; - } - -@@ -3511,8 +3511,8 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ - { - const struct vkd3d_format *format; - -- assert(d3d12_resource_is_buffer(counter_resource)); -- assert(desc->u.Buffer.StructureByteStride); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(counter_resource)); -+ VKD3D_ASSERT(desc->u.Buffer.StructureByteStride); - - format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); - if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, -@@ -3632,7 +3632,7 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - } - - resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); -- assert(d3d12_resource_is_buffer(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); - return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, - gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view); - } -@@ -3904,7 +3904,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev - vkd3d_desc.layer_count = resource->desc.DepthOrArraySize; - } - -- assert(d3d12_resource_is_texture(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); - - if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) - return; -@@ -3990,7 +3990,7 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev - } - } - -- assert(d3d12_resource_is_texture(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); - - if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) - return; -@@ -4559,7 +4559,7 @@ struct d3d12_query_heap *unsafe_impl_from_ID3D12QueryHeap(ID3D12QueryHeap *iface - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_query_heap_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_query_heap_vtbl); - return impl_from_ID3D12QueryHeap(iface); - } - -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index bbfaaad47dd..0bdb7ea524d 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -194,7 +194,7 @@ struct d3d12_root_signature *unsafe_impl_from_ID3D12RootSignature(ID3D12RootSign - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_root_signature_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_root_signature_vtbl); - return impl_from_ID3D12RootSignature(iface); - } - -@@ -345,15 +345,93 @@ struct d3d12_root_signature_info - unsigned int sampler_unbounded_range_count; - - size_t cost; -+ -+ struct d3d12_root_signature_info_range -+ { -+ enum vkd3d_shader_descriptor_type type; -+ unsigned int space; -+ unsigned int base_idx; -+ unsigned int count; -+ D3D12_SHADER_VISIBILITY visibility; -+ } *ranges; -+ size_t range_count, range_capacity; - }; - -+static HRESULT d3d12_root_signature_info_add_range(struct d3d12_root_signature_info *info, -+ enum vkd3d_shader_descriptor_type type, D3D12_SHADER_VISIBILITY visibility, -+ unsigned int space, unsigned int base_idx, unsigned int count) -+{ -+ struct d3d12_root_signature_info_range *range; -+ -+ if (!vkd3d_array_reserve((void **)&info->ranges, &info->range_capacity, info->range_count + 1, -+ sizeof(*info->ranges))) -+ return E_OUTOFMEMORY; -+ -+ range = &info->ranges[info->range_count++]; -+ range->type = type; -+ range->space = space; -+ range->base_idx = base_idx; -+ range->count = count; -+ range->visibility = visibility; -+ -+ return S_OK; -+} -+ -+static int d3d12_root_signature_info_range_compare(const void *a, const void *b) -+{ -+ const struct d3d12_root_signature_info_range *range_a = a, *range_b = b; -+ int ret; -+ -+ if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) -+ return ret; -+ -+ if ((ret = vkd3d_u32_compare(range_a->space, range_b->space))) -+ return ret; -+ -+ return vkd3d_u32_compare(range_a->base_idx, range_b->base_idx); -+} -+ -+static HRESULT d3d12_root_signature_info_range_validate(const struct d3d12_root_signature_info_range *ranges, -+ unsigned int count, D3D12_SHADER_VISIBILITY visibility) -+{ -+ const struct d3d12_root_signature_info_range *range, *next; -+ unsigned int i = 0, j; -+ -+ while (i < count) -+ { -+ range = &ranges[i]; -+ -+ for (j = i + 1; j < count; ++j) -+ { -+ next = &ranges[j]; -+ -+ if (range->visibility != D3D12_SHADER_VISIBILITY_ALL -+ && next->visibility != D3D12_SHADER_VISIBILITY_ALL -+ && range->visibility != next->visibility) -+ continue; -+ -+ if (range->type == next->type && range->space == next->space -+ && range->base_idx + range->count > next->base_idx) -+ return E_INVALIDARG; -+ -+ break; -+ } -+ -+ i = j; -+ } -+ -+ return S_OK; -+} -+ - static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, -- const D3D12_ROOT_DESCRIPTOR_TABLE *table, bool use_array) -+ const D3D12_ROOT_PARAMETER *param, bool use_array) - { - bool cbv_unbounded_range = false, srv_unbounded_range = false, uav_unbounded_range = false; -+ const D3D12_ROOT_DESCRIPTOR_TABLE *table = ¶m->u.DescriptorTable; - bool sampler_unbounded_range = false; - bool unbounded = false; - unsigned int i, count; -+ HRESULT hr; - - for (i = 0; i < table->NumDescriptorRanges; ++i) - { -@@ -381,6 +459,12 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig - } - - count = range->NumDescriptors; -+ -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType), -+ param->ShaderVisibility, range->RegisterSpace, range->BaseShaderRegister, count))) -+ return hr; -+ - if (range->NumDescriptors == UINT_MAX) - { - unbounded = true; -@@ -453,7 +537,7 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i - { - case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: - if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info, -- &p->u.DescriptorTable, use_array))) -+ p, use_array))) - return hr; - ++info->cost; - break; -@@ -463,23 +547,41 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i - ++info->cbv_count; - ++info->binding_count; - info->cost += 2; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) -+ return hr; - break; -+ - case D3D12_ROOT_PARAMETER_TYPE_SRV: - ++info->root_descriptor_count; - ++info->srv_count; - ++info->binding_count; - info->cost += 2; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, p->ShaderVisibility, -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) -+ return hr; - break; -+ - case D3D12_ROOT_PARAMETER_TYPE_UAV: - ++info->root_descriptor_count; - ++info->uav_count; - ++info->binding_count; - info->cost += 2; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, p->ShaderVisibility, -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) -+ return hr; - break; - - case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: - ++info->root_constant_count; - info->cost += p->u.Constants.Num32BitValues; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, -+ p->u.Constants.RegisterSpace, p->u.Constants.ShaderRegister, 1))) -+ return hr; - break; - - default: -@@ -491,6 +593,30 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i - info->binding_count += desc->NumStaticSamplers; - info->sampler_count += desc->NumStaticSamplers; - -+ for (i = 0; i < desc->NumStaticSamplers; ++i) -+ { -+ const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; -+ -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->ShaderVisibility, -+ s->RegisterSpace, s->ShaderRegister, 1))) -+ return hr; -+ } -+ -+ qsort(info->ranges, info->range_count, sizeof(*info->ranges), -+ d3d12_root_signature_info_range_compare); -+ -+ for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i) -+ { -+ if (FAILED(hr = d3d12_root_signature_info_range_validate(info->ranges, info->range_count, i))) -+ return hr; -+ } -+ -+ vkd3d_free(info->ranges); -+ info->ranges = NULL; -+ info->range_count = 0; -+ info->range_capacity = 0; -+ - return S_OK; - } - -@@ -512,7 +638,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat - if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) - continue; - -- assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); -+ VKD3D_ASSERT(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); - push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL - : stage_flags_from_visibility(p->ShaderVisibility); - push_constants[p->ShaderVisibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); -@@ -963,20 +1089,6 @@ static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_r - descriptor_offset, is_buffer, shader_visibility, context); - } - --static int compare_register_range(const void *a, const void *b) --{ -- const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; -- int ret; -- -- if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) -- return ret; -- -- if ((ret = vkd3d_u32_compare(range_a->register_space, range_b->register_space))) -- return ret; -- -- return vkd3d_u32_compare(range_a->base_register_idx, range_b->base_register_idx); --} -- - static int compare_descriptor_range(const void *a, const void *b) - { - const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; -@@ -991,25 +1103,6 @@ static int compare_descriptor_range(const void *a, const void *b) - return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX); - } - --static HRESULT validate_descriptor_register_ranges(const struct d3d12_root_descriptor_table_range *ranges, -- unsigned int count) --{ -- const struct d3d12_root_descriptor_table_range *range, *prev; -- unsigned int i; -- -- for (i = 1; i < count; ++i) -- { -- range = &ranges[i]; -- prev = &ranges[i - 1]; -- -- if (range->type == prev->type && range->register_space == prev->register_space -- && range->base_register_idx - prev->base_register_idx < prev->descriptor_count) -- return E_INVALIDARG; -- } -- -- return S_OK; --} -- - static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, - struct vkd3d_descriptor_set_context *context) -@@ -1070,10 +1163,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - offset += range->NumDescriptors; - } - -- qsort(table->ranges, range_count, sizeof(*table->ranges), compare_register_range); -- if (FAILED(hr = validate_descriptor_register_ranges(table->ranges, range_count))) -- return hr; -- - qsort(table->ranges, range_count, sizeof(*table->ranges), compare_descriptor_range); - - for (j = 0; j < range_count; ++j) -@@ -1226,7 +1315,7 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - unsigned int i; - HRESULT hr; - -- assert(root_signature->static_sampler_count == desc->NumStaticSamplers); -+ VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); - for (i = 0; i < desc->NumStaticSamplers; ++i) - { - const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; -@@ -1612,7 +1701,7 @@ static HRESULT vkd3d_render_pass_cache_create_pass_locked(struct vkd3d_render_pa - - have_depth_stencil = key->depth_enable || key->stencil_enable; - rt_count = have_depth_stencil ? key->attachment_count - 1 : key->attachment_count; -- assert(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); -+ VKD3D_ASSERT(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); - - for (index = 0, attachment_index = 0; index < rt_count; ++index) - { -@@ -2152,7 +2241,7 @@ struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12Pipeline - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_pipeline_state_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_pipeline_state_vtbl); - return impl_from_ID3D12PipelineState(iface); - } - -@@ -2308,7 +2397,7 @@ static HRESULT d3d12_pipeline_state_init_uav_counters(struct d3d12_pipeline_stat - unsigned int i, j; - HRESULT hr; - -- assert(vkd3d_popcount(stage_flags) == 1); -+ VKD3D_ASSERT(vkd3d_popcount(stage_flags) == 1); - - for (i = 0; i < shader_info->descriptor_count; ++i) - { -@@ -2923,7 +3012,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( - - if (dsv_format) - { -- assert(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); -+ VKD3D_ASSERT(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); - key.depth_enable = graphics->ds_desc.depthTestEnable; - key.stencil_enable = graphics->ds_desc.stencilTestEnable; - key.depth_stencil_write = graphics->ds_desc.depthWriteEnable -@@ -2940,7 +3029,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( - if (key.attachment_count != ARRAY_SIZE(key.vk_formats)) - key.vk_formats[ARRAY_SIZE(key.vk_formats) - 1] = VK_FORMAT_UNDEFINED; - for (i = key.attachment_count; i < ARRAY_SIZE(key.vk_formats); ++i) -- assert(key.vk_formats[i] == VK_FORMAT_UNDEFINED); -+ VKD3D_ASSERT(key.vk_formats[i] == VK_FORMAT_UNDEFINED); - - key.padding = 0; - key.sample_count = graphics->ms_desc.rasterizationSamples; -@@ -3488,7 +3577,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - graphics->ms_desc.pSampleMask = NULL; - if (desc->sample_mask != ~0u) - { -- assert(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); -+ VKD3D_ASSERT(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); - graphics->sample_mask[0] = desc->sample_mask; - graphics->sample_mask[1] = 0xffffffffu; - graphics->ms_desc.pSampleMask = graphics->sample_mask; -@@ -3781,7 +3870,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta - .pDynamicStates = dynamic_states, - }; - -- assert(d3d12_pipeline_state_is_graphics(state)); -+ VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(state)); - - memset(&pipeline_key, 0, sizeof(pipeline_key)); - pipeline_key.topology = topology; -diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index 11029c9f5f9..831dc07af56 100644 ---- a/libs/vkd3d/libs/vkd3d/utils.c -+++ b/libs/vkd3d/libs/vkd3d/utils.c -@@ -331,7 +331,7 @@ static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device - - if (j >= current_list->format_count) - { -- assert(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); -+ VKD3D_ASSERT(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); - current_list->vk_formats[current_list->format_count++] = vk_format; - } - } -@@ -427,7 +427,7 @@ static const struct vkd3d_format *vkd3d_get_depth_stencil_format(const struct d3 - const struct vkd3d_format *formats; - unsigned int i; - -- assert(device); -+ VKD3D_ASSERT(device); - formats = device->depth_stencil_formats; - - for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i) -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index c7431bd821b..9eccec111c7 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -38,12 +38,12 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - } - if (!create_info->instance && !create_info->instance_create_info) - { -- ERR("Instance or instance create info is required.\n"); -+ WARN("Instance or instance create info is required.\n"); - return E_INVALIDARG; - } - if (create_info->instance && create_info->instance_create_info) - { -- ERR("Instance and instance create info are mutually exclusive parameters.\n"); -+ WARN("Instance and instance create info are mutually exclusive parameters.\n"); - return E_INVALIDARG; - } - -@@ -153,7 +153,7 @@ static const D3D12_ROOT_SIGNATURE_DESC * STDMETHODCALLTYPE d3d12_root_signature_ - - TRACE("iface %p.\n", iface); - -- assert(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); -+ VKD3D_ASSERT(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); - return &deserializer->desc.d3d12.u.Desc_1_0; - } - -@@ -354,7 +354,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_Get - } - } - -- assert(deserializer->other_desc.d3d12.Version == version); -+ VKD3D_ASSERT(deserializer->other_desc.d3d12.Version == version); - *desc = &deserializer->other_desc.d3d12; - return S_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index cae8aa69c8b..a4bd2202f39 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -787,8 +787,8 @@ extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; - static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( - VkDescriptorType type) - { -- assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); -- assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); -+ VKD3D_ASSERT(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); -+ VKD3D_ASSERT(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); - - return vk_descriptor_set_index_table[type]; - } -@@ -1232,7 +1232,7 @@ enum vkd3d_pipeline_bind_point - /* ID3D12CommandList */ - struct d3d12_command_list - { -- ID3D12GraphicsCommandList5 ID3D12GraphicsCommandList5_iface; -+ ID3D12GraphicsCommandList6 ID3D12GraphicsCommandList6_iface; - unsigned int refcount; - - D3D12_COMMAND_LIST_TYPE type; -@@ -1768,7 +1768,7 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) - const void *next; - } *vkd3d_header = header, *vkd3d_structure = structure; - -- assert(!vkd3d_structure->next); -+ VKD3D_ASSERT(!vkd3d_structure->next); - vkd3d_structure->next = vkd3d_header->next; - vkd3d_header->next = vkd3d_structure; - } --- -2.43.0 -